diff --git a/src/mapleall/maple_be/include/be/common_utils.h b/src/mapleall/maple_be/include/be/common_utils.h index 77f7468f2ea90e4fe90b531c771d7054af0a3f51..0789de3c41804a5caddcc11f881e1b07468a708c 100644 --- a/src/mapleall/maple_be/include/be/common_utils.h +++ b/src/mapleall/maple_be/include/be/common_utils.h @@ -45,6 +45,7 @@ constexpr uint32 k1ByteSize = 1; constexpr uint32 k2ByteSize = 2; constexpr uint32 k4ByteSize = 4; constexpr uint32 k8ByteSize = 8; +constexpr uint32 k12ByteSize = 12; constexpr uint32 k14ByteSize = 14; constexpr uint32 k15ByteSize = 15; constexpr uint32 k16ByteSize = 16; @@ -69,6 +70,7 @@ constexpr int32 kInsnEighthOpnd = 7; /* Number of registers */ constexpr uint32 kOneRegister = 1; constexpr uint32 kTwoRegister = 2; +constexpr uint32 kFourRegister = 4; /* Size of struct for memcpy */ constexpr uint32 kParmMemcpySize = 40; diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_abi.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_abi.h index 12511c58e6f477dad49dc12805eef463567e09d2..6e7c2dc397c4146bf43d04b764294160bc15afc7 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_abi.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_abi.h @@ -64,8 +64,12 @@ enum AArch64ArgumentClass : uint8 { struct PLocInfo { AArch64reg reg0; /* 0 means parameter is stored on the stack */ AArch64reg reg1; + AArch64reg reg2; /* can have up to 4 single precision fp registers */ + AArch64reg reg3; /* for small structure return. */ int32 memOffset; int32 memSize; + uint32 fpSize; + uint32 numFpPureRegs; }; /* @@ -84,6 +88,7 @@ class ParmLocator { /* Return size of aggregate structure copy on stack. */ int32 LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst = false); + int32 LocateRetVal(MIRType &retType, PLocInfo &ploc); void InitPLocInfo(PLocInfo &pLoc) const; private: @@ -110,6 +115,35 @@ class ParmLocator { return (nextFloatRegNO < AArch64Abi::kNumFloatParmRegs) ? AArch64Abi::floatParmRegs[nextFloatRegNO++] : kRinvalid; } + inline void AllocateNSIMDFPRegisters(PLocInfo &ploc, uint32 num) { + if ((nextFloatRegNO + num - 1) < AArch64Abi::kNumFloatParmRegs) { + switch (num) { + case 1: + ploc.reg0 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + break; + case 2: + ploc.reg0 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + ploc.reg1 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + break; + case 3: + ploc.reg0 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + ploc.reg1 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + ploc.reg2 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + break; + case 4: + ploc.reg0 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + ploc.reg1 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + ploc.reg2 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + ploc.reg3 = AArch64Abi::floatParmRegs[nextFloatRegNO++]; + break; + default: + CHECK_FATAL(0, "AllocateNSIMDFPRegisters: unsupported"); + } + } else { + ploc.reg0 = kRinvalid; + } + } + void RoundNGRNUpToNextEven() { nextGeneralRegNO = static_cast((nextGeneralRegNO + 1) & ~static_cast(1)); } @@ -140,6 +174,14 @@ class ReturnMechanism { return reg1; } + AArch64reg GetReg2() const { + return reg2; + } + + AArch64reg GetReg3() const { + return reg3; + } + void SetupToReturnThroughMemory() { regCount = 1; reg0 = R8; @@ -151,6 +193,8 @@ class ReturnMechanism { uint8 regCount; /* number of registers <= 2 storing the return value */ AArch64reg reg0; /* first register storing the return value */ AArch64reg reg1; /* second register storing the return value */ + AArch64reg reg2; /* third register storing the return value */ + AArch64reg reg3; /* fourth register storing the return value */ PrimType primTypeOfReg0; /* the primitive type stored in reg0 */ PrimType primTypeOfReg1; /* the primitive type stored in reg1 */ }; diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_args.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_args.h index 7ece92133e64656e387129c0e0ab74cc3ee30d4e..67e6f402c70e847604218423a06a74735ec6575e 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_args.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_args.h @@ -29,6 +29,8 @@ struct ArgInfo { RegType regType; MIRSymbol *sym; const AArch64SymbolAlloc *symLoc; + uint8 memPairSecondRegSize; /* struct arg requiring two regs, size of 2nd reg */ + bool doMemPairOpt; }; class AArch64MoveRegArgs : public MoveRegArgs { @@ -40,11 +42,15 @@ class AArch64MoveRegArgs : public MoveRegArgs { private: RegOperand *baseReg = nullptr; const MemSegment *lastSegment = nullptr; - void CollectRegisterArgs(std::map &argsList, std::vector &indexList) const; - ArgInfo GetArgInfo(std::map &argsList, uint32 argIndex) const; + void CollectRegisterArgs(std::map &argsList, std::vector &indexList, + std::map &pairReg, std::vector &numFpRegs, + std::vector &fpSize) const; + ArgInfo GetArgInfo(std::map &argsList, std::vector &numFpRegs, + std::vector &fpSize, uint32 argIndex) const; bool IsInSameSegment(const ArgInfo &firstArgInfo, const ArgInfo &secondArgInfo) const; + void GenOneInsn(ArgInfo &argInfo, AArch64RegOperand &baseOpnd, uint32 stBitSize, AArch64reg dest, int32 offset); void GenerateStpInsn(const ArgInfo &firstArgInfo, const ArgInfo &secondArgInfo); - void GenerateStrInsn(ArgInfo &argInfo); + void GenerateStrInsn(ArgInfo &argInfo, AArch64reg reg2, uint32 numFpRegs, uint32 fpSize); void MoveRegisterArgs(); void MoveVRegisterArgs(); void MoveLocalRefVarToRefLocals(MIRSymbol &mirSym); @@ -53,4 +59,4 @@ class AArch64MoveRegArgs : public MoveRegArgs { }; } /* namespace maplebe */ -#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ARGS_H */ \ No newline at end of file +#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_ARGS_H */ diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h index a422d4f0e462946595376030907feefb9478d73c..50742c7d9fe575fb1eee08a7f9b3815b762c7ece 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -524,6 +524,12 @@ class AArch64CGFunc : public CGFunc { regno_t regNOCatch; /* For O2. */ Operand *opndCatch; /* For O0-O1. */ } uCatch; + enum fpParamState { + kNotFp, + kFp32Bit, + kFp64Bit, + kStateUnknown, + }; Operand *rcc = nullptr; Operand *vary = nullptr; Operand *fsp = nullptr; /* used to point the address of local variables and formal parameters */ @@ -561,11 +567,12 @@ class AArch64CGFunc : public CGFunc { } void CreateCallStructParamPassByStack(int32 symSize, MIRSymbol *sym, RegOperand *addrOpnd, int32 baseOffset); - void CreateCallStructParamPassByReg(AArch64reg reg, MemOperand &memOpnd, AArch64ListOperand &srcOpnds); + void CreateCallStructParamPassByReg(AArch64reg reg, MemOperand &memOpnd, AArch64ListOperand &srcOpnds, + fpParamState state); void CreateCallStructParamMemcpy(const MIRSymbol *sym, RegOperand *addropnd, uint32 structSize, int32 copyOffset, int32 fromOffset); AArch64RegOperand *CreateCallStructParamCopyToStack(uint32 numMemOp, MIRSymbol *sym, RegOperand *addropnd, - int32 copyOffset, AArch64reg reg); + int32 copyOffset, PLocInfo &pLoc); void SelectParmListDreadSmallAggregate(MIRSymbol &sym, MIRType &structType, AArch64ListOperand &srcOpnds, ParmLocator &parmLocator); void SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, AArch64ListOperand &srcOpnds, diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_memlayout.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_memlayout.h index 11c7b97b1ef0dc63dbb2e06e2d54f7e890cc0f9e..2d3e6b560ac7c65bf3ce0d441f1325c61afe563e 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_memlayout.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_memlayout.h @@ -25,9 +25,11 @@ class AArch64SymbolAlloc : public SymbolAlloc { ~AArch64SymbolAlloc() = default; - void SetRegisters(AArch64reg r0, AArch64reg r1) { + void SetRegisters(AArch64reg r0, AArch64reg r1, AArch64reg r2, AArch64reg r3) { reg0 = r0; reg1 = r1; + reg2 = r2; + reg3 = r3; } inline bool IsRegister() { @@ -37,6 +39,8 @@ class AArch64SymbolAlloc : public SymbolAlloc { private: AArch64reg reg0 = kRinvalid; AArch64reg reg1 = kRinvalid; + AArch64reg reg2 = kRinvalid; + AArch64reg reg3 = kRinvalid; }; /* diff --git a/src/mapleall/maple_be/src/be/becommon.cpp b/src/mapleall/maple_be/src/be/becommon.cpp index 28b83f13749ee90eaf2daeaeab5777267a357156..b7ec3138e3dab35719204f69b790c3f2dc75383e 100644 --- a/src/mapleall/maple_be/src/be/becommon.cpp +++ b/src/mapleall/maple_be/src/be/becommon.cpp @@ -620,8 +620,7 @@ void BECommon::AddElementToJClassLayout(MIRClassType &klass, JClassFieldInfo inf } void BECommon::AddElementToFuncReturnType(MIRFunction &func, const TyIdx tyIdx) { - TyIdx &ty = funcReturnType.at(&func); - ty = tyIdx; + funcReturnType[&func] = tyIdx; } MIRType *BECommon::BeGetOrCreatePointerType(const MIRType &pointedType) { diff --git a/src/mapleall/maple_be/src/be/lower.cpp b/src/mapleall/maple_be/src/be/lower.cpp index ea07f8115593da4991387ba1c619eefb61a5975a..ac9df02ab88bb2f2fe5a146e3bb088fd1ad243cd 100644 --- a/src/mapleall/maple_be/src/be/lower.cpp +++ b/src/mapleall/maple_be/src/be/lower.cpp @@ -1050,6 +1050,7 @@ BlockNode *CGLowerer::LowerCallAssignedStmt(StmtNode &stmt) { auto &origCall = static_cast(stmt); newCall = GenCallNode(stmt, funcCalled, origCall); p2nRets = &origCall.GetReturnVec(); + static_cast(newCall)->SetReturnVec(*p2nRets); break; } case OP_intrinsiccallassigned: @@ -1067,12 +1068,14 @@ BlockNode *CGLowerer::LowerCallAssignedStmt(StmtNode &stmt) { } newCall = GenIntrinsiccallNode(stmt, funcCalled, handledAtLowerLevel, intrincall); p2nRets = &intrincall.GetReturnVec(); + static_cast(newCall)->SetReturnVec(*p2nRets); break; } case OP_intrinsiccallwithtypeassigned: { auto &origCall = static_cast(stmt); newCall = GenIntrinsiccallNode(stmt, funcCalled, handledAtLowerLevel, origCall); p2nRets = &origCall.GetReturnVec(); + static_cast(newCall)->SetReturnVec(*p2nRets); break; } case OP_icallassigned: { diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp index a5a51309447fd72f6dc43d8aa34d8518b76dd15a..ebcda1c7a09ddf18b3e116a2d90a4f37b19e945f 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp @@ -19,35 +19,7 @@ namespace maplebe { using namespace maple; namespace { -constexpr int kMaxRegCount = 2; - -/* - * return the class resulted from merging the two classes, based on rules - * described by the ARM ABI - */ -AArch64ArgumentClass MergeClasses(AArch64ArgumentClass class0, AArch64ArgumentClass class1) { - /* - * maybe return ( class0 | class 1 ) would do if - * ( class0 != kAArch64MemoryClass && class1 != kAArch64MemoryClass ) always hold - */ - if (class0 == class1) { - return class0; - } - if (class0 == kAArch64NoClass) { - return class1; - } - if (class1 == kAArch64NoClass) { - return class0; - } - if ((class0 == kAArch64MemoryClass) || (class1 == kAArch64MemoryClass)) { - return kAArch64MemoryClass; - } - if ((class0 == kAArch64IntegerClass) || (class1 == kAArch64IntegerClass)) { - return kAArch64IntegerClass; - } - ASSERT(false, "NYI"); - return kAArch64NoClass; -} +constexpr int kMaxRegCount = 4; int32 ProcessNonStructAndNonArrayWhenClassifyAggregate(const MIRType &mirType, AArch64ArgumentClass classes[kMaxRegCount], @@ -84,95 +56,95 @@ int32 ProcessNonStructAndNonArrayWhenClassifyAggregate(const MIRType &mirType, return 0; } -void ProcessNonUnionWhenClassifyAggregate(const BECommon &be, const MIRType &fieldType, uint32 &fldBofst, - uint64 &allocedSize, uint64 &allocedSizeInBits) { - /* determine fld_bofst for this field */ - uint64 fieldTypeSize = be.GetTypeSize(fieldType.GetTypeIndex()); - ASSERT(fieldTypeSize != 0, "fieldTypeSize should not be 0"); - uint8 fieldAlign = be.GetTypeAlign(fieldType.GetTypeIndex()); - ASSERT(fieldAlign != 0, "fieldAlign should not be 0"); - if (fieldType.GetKind() == kTypeBitField) { - uint32 fieldSize = static_cast(fieldType).GetFieldSize(); - if ((allocedSizeInBits / (fieldAlign * k8ByteSize)) != - ((allocedSizeInBits + fieldSize - 1u) / (fieldAlign * k8ByteSize))) { - /* - * the field is crossing the align boundary of its base type; - * align alloced_size_in_bits to fieldAlign - */ - allocedSizeInBits = RoundUp(allocedSizeInBits, fieldAlign * k8ByteSize); +PrimType TraverseStructFieldsForFp(MIRType *ty, uint32 &numRegs) { + if (ty->GetKind() == kTypeArray) { + MIRArrayType *arrtype = static_cast(ty); + MIRType *pty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(arrtype->GetElemTyIdx()); + if (pty->GetKind() == kTypeArray || pty->GetKind() == kTypeStruct) { + return TraverseStructFieldsForFp(pty, numRegs); + } + for (uint32 i = 0; i < arrtype->GetDim(); ++i) { + numRegs += arrtype->GetSizeArrayItem(i); + } + return pty->GetPrimType(); + } else if (ty->GetKind() == kTypeStruct) { + MIRStructType *sttype = static_cast(ty); + FieldVector fields = sttype->GetFields(); + PrimType oldtype = PTY_void; + for (uint32 fcnt = 0; fcnt < fields.size(); ++fcnt) { + TyIdx fieldtyidx = fields[fcnt].second.first; + MIRType *fieldty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(fieldtyidx); + PrimType ptype = TraverseStructFieldsForFp(fieldty, numRegs); + if (oldtype != PTY_void && oldtype != ptype) { + return PTY_void; + } else { + oldtype = ptype; + } } - /* allocate the bitfield */ - fldBofst = allocedSizeInBits; - allocedSizeInBits += fieldSize; - allocedSize = std::max(allocedSize, RoundUp(allocedSizeInBits, fieldAlign * k8ByteSize) / k8ByteSize); + return oldtype; } else { - /* pad alloced_size according to the field alignment */ - allocedSize = RoundUp(allocedSize, fieldAlign); - fldBofst = allocedSize * k8ByteSize; - allocedSize += fieldTypeSize; - allocedSizeInBits = allocedSize * k8ByteSize; + numRegs++; + return ty->GetPrimType(); } } int32 ClassifyAggregate(BECommon &be, MIRType &mirType, AArch64ArgumentClass classes[kMaxRegCount], - size_t classesLength); + size_t classesLength, uint32 &fpSize); -void ProcessStructWhenClassifyAggregate(BECommon &be, MIRStructType &structType, int32 &subNumRegs, +uint32 ProcessStructWhenClassifyAggregate(BECommon &be, MIRStructType &structType, AArch64ArgumentClass classes[kMaxRegCount], - size_t classesLength) { + size_t classesLength, uint32 &fpSize) { CHECK_FATAL(classesLength > 0, "classLength must > 0"); int32 sizeOfTyInDwords = RoundUp(be.GetTypeSize(structType.GetTypeIndex()), k8ByteSize) >> k8BitShift; - AArch64ArgumentClass subClasses[kMaxRegCount]; - uint32 fldBofst = 0; /* offset of field in bits within immediate struct */ - uint64 allocedSize = 0; - uint64 allocedSizeInBits = 0; + bool isF32 = false; + bool isF64 = false; + uint32 numRegs = 0; for (uint32 f = 0; f < structType.GetFieldsSize(); ++f) { TyIdx fieldTyIdx = structType.GetFieldsElemt(f).second.first; MIRType *fieldType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(fieldTyIdx); - subNumRegs = ClassifyAggregate(be, *fieldType, subClasses, sizeof(subClasses) / sizeof(AArch64ArgumentClass)); - ASSERT(subNumRegs > 0, "expect subNumRegs > 0"); /* we come here when the total size < 16? */ - if (subNumRegs == 0) { - return; - } - if (structType.GetKind() != kTypeUnion) { - ProcessNonUnionWhenClassifyAggregate(be, (*fieldType), fldBofst, allocedSize, allocedSizeInBits); + PrimType pType = TraverseStructFieldsForFp(fieldType, numRegs); + if (pType == PTY_f32) { + if (isF64) { + isF64 = false; + break; + } + isF32 = true; + } else if (pType == PTY_f64) { + if (isF32) { + isF32 = false; + break; + } + isF64 = true; } else { - /* - * for unions, bitfields are treated as non-bitfields - * the parent aggregate is union, why are we increasing the alloced_size? - * this would alter the next field's bit offset? - */ - uint64 fieldTypeSize = be.GetTypeSize(fieldType->GetTypeIndex()); - ASSERT(fieldTypeSize != 0, "fieldTypeSize should not be 0"); - fldBofst = allocedSize * k8ByteSize; - allocedSize = std::max(allocedSize, fieldTypeSize); - } - /* merge subClasses into classes */ - int32 idx = fldBofst >> 6; /* index into the struct in doublewords */ - ASSERT(idx > 0, "expect idx > 0"); - ASSERT(idx < kMaxRegCount, "expect idx < kMaxRegCount"); - ASSERT(subNumRegs == 1, "subNumRegs should be equal to 1"); - ASSERT(subClasses[0] != kAArch64MemoryClass, "expect a kAArch64MemoryClass"); - for (int32 i = 0; i < subNumRegs; ++i) { - classes[i + idx] = MergeClasses(classes[i + idx], subClasses[i]); + isF32 = isF64 = false; + break; } } - if (subNumRegs < sizeOfTyInDwords) { - for (int32 i = 1; i < sizeOfTyInDwords; ++i) { - if (classes[i] == kAArch64NoClass) { - classes[i] = classes[i - 1]; - } + if (isF32 || isF64) { + for (int32 i = 0; i < numRegs; ++i) { + classes[i] = kAArch64FloatClass; } + fpSize = isF32 ? k4ByteSize : k8ByteSize; + return numRegs; } + + classes[0] = kAArch64IntegerClass; + if (sizeOfTyInDwords == 2) { + classes[1] = kAArch64IntegerClass; + } + return sizeOfTyInDwords; } -void ProcessArrayWhenClassifyAggregate(BECommon &be, const MIRArrayType &mirArrayType, int32 &subNumRegs, +void ProcessArrayWhenClassifyAggregate(BECommon &be, const MIRArrayType &mirArrayType, AArch64ArgumentClass classes[kMaxRegCount], size_t classesLength) { + CHECK_FATAL(0, "Should not be here"); CHECK_FATAL(classesLength > 0, "classLength must > 0"); int32 sizeOfTyInDwords = RoundUp(be.GetTypeSize(mirArrayType.GetTypeIndex()), k8ByteSize) >> k8BitShift; AArch64ArgumentClass subClasses[kMaxRegCount]; + uint32 subNumRegs; + uint32 fpSize; subNumRegs = ClassifyAggregate(be, *(GlobalTables::GetTypeTable().GetTypeFromTyIdx(mirArrayType.GetElemTyIdx())), - subClasses, sizeof(subClasses) / sizeof(AArch64ArgumentClass)); + subClasses, sizeof(subClasses) / sizeof(AArch64ArgumentClass), fpSize); CHECK_FATAL(subNumRegs == 1, "subnumregs should be equal to 1"); for (int32 i = 0; i < sizeOfTyInDwords; ++i) { classes[i] = subClasses[i]; @@ -186,7 +158,7 @@ void ProcessArrayWhenClassifyAggregate(BECommon &be, const MIRArrayType &mirArra * means the whole aggregate is passed in memory. */ int32 ClassifyAggregate(BECommon &be, MIRType &mirType, AArch64ArgumentClass classes[kMaxRegCount], - size_t classesLength) { + size_t classesLength, uint32 &fpSize) { CHECK_FATAL(classesLength > 0, "invalid index"); uint64 sizeOfTy = be.GetTypeSize(mirType.GetTypeIndex()); /* Rule B.3. @@ -206,7 +178,7 @@ int32 ClassifyAggregate(BECommon &be, MIRType &mirType, AArch64ArgumentClass cla */ int32 sizeOfTyInDwords = RoundUp(sizeOfTy, k8ByteSize) >> k8BitShift; ASSERT(sizeOfTyInDwords > 0, "sizeOfTyInDwords should be sizeOfTyInDwords > 0"); - ASSERT(sizeOfTyInDwords <= kMaxRegCount, "sizeOfTyInDwords should be sizeOfTyInDwords <= kMaxRegCount"); + ASSERT(sizeOfTyInDwords <= kTwoRegister, "sizeOfTyInDwords should be <= 2"); int32 i; for (i = 0; i < sizeOfTyInDwords; ++i) { classes[i] = kAArch64NoClass; @@ -214,17 +186,13 @@ int32 ClassifyAggregate(BECommon &be, MIRType &mirType, AArch64ArgumentClass cla if ((mirType.GetKind() != kTypeStruct) && (mirType.GetKind() != kTypeArray) && (mirType.GetKind() != kTypeUnion)) { return ProcessNonStructAndNonArrayWhenClassifyAggregate(mirType, classes, classesLength); } - int32 subNumRegs; if (mirType.GetKind() == kTypeStruct) { MIRStructType &structType = static_cast(mirType); - ProcessStructWhenClassifyAggregate(be, structType, subNumRegs, classes, classesLength); - if (subNumRegs == 0) { - return 0; - } + return ProcessStructWhenClassifyAggregate(be, structType, classes, classesLength, fpSize); } else { /* mirType->_kind == TYPE_ARRAY */ auto &mirArrayType = static_cast(mirType); - ProcessArrayWhenClassifyAggregate(be, mirArrayType, subNumRegs, classes, classesLength); + ProcessArrayWhenClassifyAggregate(be, mirArrayType, classes, classesLength); } /* post merger clean-up */ for (i = 0; i < sizeOfTyInDwords; ++i) { @@ -364,7 +332,45 @@ bool IsSpillRegInRA(AArch64reg regNO, bool has3RegOpnd) { void ParmLocator::InitPLocInfo(PLocInfo &pLoc) const { pLoc.reg0 = kRinvalid; pLoc.reg1 = kRinvalid; + pLoc.reg2 = kRinvalid; + pLoc.reg3 = kRinvalid; pLoc.memOffset = nextStackArgAdress; + pLoc.fpSize = 0; + pLoc.numFpPureRegs = 0; +} + +int32 ParmLocator::LocateRetVal(MIRType &retType, PLocInfo &pLoc) { + InitPLocInfo(pLoc); + int retSize = beCommon.GetTypeSize(retType.GetTypeIndex().GetIdx()); + if (retSize == 0) { + return 0; // size 0 ret val + } + if (retSize <= k16ByteSize) { + // For return struct size less or equal to 16 bytes, the values + // are returned in register pairs. + AArch64ArgumentClass classes[kMaxRegCount]; // Max of four floats. + uint32 fpSize; + int32 numRegs = ClassifyAggregate(beCommon, retType, classes, sizeof(classes), fpSize); + if (classes[0] == kAArch64FloatClass) { + CHECK_FATAL(numRegs <= kMaxRegCount, "LocateNextParm: illegal number of regs"); + AllocateNSIMDFPRegisters(pLoc, numRegs); + pLoc.numFpPureRegs = numRegs; + pLoc.fpSize = fpSize; + return 0; + } else { + CHECK_FATAL(numRegs <= kTwoRegister, "LocateNextParm: illegal number of regs"); + if (numRegs == kOneRegister) { + pLoc.reg0 = AllocateGPRegister(); + } else { + AllocateTwoGPRegisters(pLoc); + } + return 0; + } + } else { + // For return struct size > 16 bytes the pointer returns in x8. + pLoc.reg0 = R8; + return kSizeOfPtr; + } } /* @@ -397,8 +403,29 @@ int32 ParmLocator::LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst return kSizeOfPtr; } /* For return struct size less or equal to 16 bytes, the values - * are returned in register pairs. Do nothing here. + * are returned in register pairs. + * Check for pure float struct. */ + AArch64ArgumentClass classes[kMaxRegCount]; + uint32 fpSize; + MIRType *retType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(beCommon.GetFuncReturnType(*func)); + int32 numRegs = ClassifyAggregate(beCommon, *retType, classes, sizeof(classes), fpSize); + if (classes[0] == kAArch64FloatClass) { + CHECK_FATAL(numRegs <= kMaxRegCount, "LocateNextParm: illegal number of regs"); + AllocateNSIMDFPRegisters(pLoc, numRegs); + pLoc.numFpPureRegs = numRegs; + pLoc.fpSize = fpSize; + return 0; + } else { + CHECK_FATAL(numRegs <= kTwoRegister, "LocateNextParm: illegal number of regs"); + if (numRegs == kOneRegister) { + pLoc.reg0 = AllocateGPRegister(); + } else { + AllocateTwoGPRegisters(pLoc); + } + return 0; + } + } } uint64 typeSize = beCommon.GetTypeSize(mirType.GetTypeIndex()); @@ -491,11 +518,12 @@ int32 ParmLocator::ProcessPtyAggWhenLocateNextParm(MIRType &mirType, PLocInfo &p * In AArch64, integer-float or float-integer * argument passing is not allowed. All should go through * integer-integer. + * In the case where a struct is homogeneous composed of one of the fp types, + * either all single fp or all double fp, then it can be passed by float-float. */ AArch64ArgumentClass classes[kMaxRegCount] = { kAArch64NoClass }; #if DEBUG int32 saveIntParmNum = nextGeneralRegNO; - int32 saveFloatParmNum = nextFloatRegNO; #endif typeSize = beCommon.GetTypeSize(mirType.GetTypeIndex().GetIdx()); int32 aggCopySize = 0; @@ -511,13 +539,19 @@ int32 ParmLocator::ProcessPtyAggWhenLocateNextParm(MIRType &mirType, PLocInfo &p RoundNGRNUpToNextEven(); } - int32 numRegs = ClassifyAggregate(beCommon, mirType, classes, sizeof(classes) / sizeof(AArch64ArgumentClass)); - if (numRegs == 1) { + uint32 fpSize; + int32 numRegs = ClassifyAggregate(beCommon, mirType, classes, sizeof(classes) / sizeof(AArch64ArgumentClass), fpSize); + if (classes[0] == kAArch64FloatClass) { + CHECK_FATAL(numRegs <= kMaxRegCount, "LocateNextParm: illegal number of regs"); + typeSize = k8ByteSize; + AllocateNSIMDFPRegisters(pLoc, numRegs); + pLoc.numFpPureRegs = numRegs; + pLoc.fpSize = fpSize; + } else if (numRegs == 1) { /* passing in registers */ typeSize = k8ByteSize; if (classes[0] == kAArch64FloatClass) { - pLoc.reg0 = AllocateSIMDFPRegister(); - ASSERT(nextFloatRegNO == saveFloatParmNum, "RegNo should be saved pramRegNO"); + CHECK_FATAL(0, "param passing in FP reg not allowed here"); } else { pLoc.reg0 = AllocateGPRegister(); ASSERT(nextGeneralRegNO == saveIntParmNum, "RegNo should be saved pramRegNO"); @@ -525,7 +559,7 @@ int32 ParmLocator::ProcessPtyAggWhenLocateNextParm(MIRType &mirType, PLocInfo &p ASSERT((pLoc.reg0 != kRinvalid) || (nextGeneralRegNO == AArch64Abi::kNumIntParmRegs), "reg0 should not be kRinvalid or nextGeneralRegNO should equal kNumIntParmRegs"); } - } else if (numRegs == kMaxRegCount) { + } else if (numRegs == 2) { ASSERT(classes[0] == kAArch64IntegerClass, "class 0 must be integer class"); ASSERT(classes[1] == kAArch64IntegerClass, "class 1 must be integer class"); AllocateTwoGPRegisters(pLoc); @@ -645,10 +679,31 @@ ReturnMechanism::ReturnMechanism(MIRType &retTy, BECommon &be) SetupToReturnThroughMemory(); return; } + uint32 fpSize; AArch64ArgumentClass classes[kMaxRegCount]; regCount = static_cast(ClassifyAggregate(be, retTy, classes, - sizeof(classes) / sizeof(AArch64ArgumentClass))); - if (regCount == 0) { + sizeof(classes) / sizeof(AArch64ArgumentClass), fpSize)); + if (classes[0] == kAArch64FloatClass) { + switch (regCount) { + case 4: + reg3 = AArch64Abi::floatReturnRegs[3]; + case 3: + reg2 = AArch64Abi::floatReturnRegs[2]; + case 2: + reg1 = AArch64Abi::floatReturnRegs[1]; + case 1: + reg0 = AArch64Abi::floatReturnRegs[0]; + break; + default: + CHECK_FATAL(0, "ReturnMechanism: unsupported"); + } + if (fpSize == 4) { + primTypeOfReg0 = primTypeOfReg1 = PTY_f32; + } else { + primTypeOfReg0 = primTypeOfReg1 = PTY_f64; + } + return; + } else if (regCount == 0) { SetupToReturnThroughMemory(); return; } else { diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp index 35a496d4da0f4ef3f553c82ae7f7aa7a68499057..25142cd610ffd1549528238a0e85275aa2fe40e7 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp @@ -25,11 +25,27 @@ void AArch64MoveRegArgs::Run() { } void AArch64MoveRegArgs::CollectRegisterArgs(std::map &argsList, - std::vector &indexList) const { + std::vector &indexList, + std::map &pairReg, + std::vector &numFpRegs, + std::vector &fpSize) const { AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + uint32 numFormal = aarchCGFunc->GetFunction().GetFormalCount(); + numFpRegs.resize(numFormal); + fpSize.resize(numFormal); ParmLocator parmlocator(aarchCGFunc->GetBecommon()); PLocInfo ploc; - for (uint32 i = 0; i < aarchCGFunc->GetFunction().GetFormalCount(); ++i) { + uint32 start = 0; + if (numFormal) { + MIRFunction *func = const_cast(aarchCGFunc->GetBecommon().GetMIRModule().CurFunction()); + if (aarchCGFunc->GetBecommon().HasFuncReturnType(*func)) { + TyIdx tyIdx = aarchCGFunc->GetBecommon().GetFuncReturnType(*func); + if (aarchCGFunc->GetBecommon().GetTypeSize(tyIdx) <= k16ByteSize) { + start = 1; + } + } + } + for (uint32 i = start; i < numFormal; ++i) { MIRType *ty = aarchCGFunc->GetFunction().GetNthParamType(i); parmlocator.LocateNextParm(*ty, ploc, i == 0); if (ploc.reg0 == kRinvalid) { @@ -45,23 +61,74 @@ void AArch64MoveRegArgs::CollectRegisterArgs(std::map &argsL if (ploc.reg1 == kRinvalid) { continue; } + if (ploc.numFpPureRegs) { + uint32 index = indexList.size() - 1; + numFpRegs[index] = ploc.numFpPureRegs; + fpSize[index] = ploc.fpSize; + continue; + } aarchCGFunc->PushElemIntoFormalRegList(ploc.reg1); + pairReg[i] = ploc.reg1; + if (ploc.reg2 == kRinvalid) { + continue; + } + aarchCGFunc->PushElemIntoFormalRegList(ploc.reg2); + if (ploc.reg3 == kRinvalid) { + continue; + } + aarchCGFunc->PushElemIntoFormalRegList(ploc.reg3); } } -ArgInfo AArch64MoveRegArgs::GetArgInfo(std::map &argsList, uint32 argIndex) const { +ArgInfo AArch64MoveRegArgs::GetArgInfo(std::map &argsList, std::vector &numFpRegs, + std::vector &fpSize, uint32 argIndex) const { AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); ArgInfo argInfo; argInfo.reg = argsList[argIndex]; argInfo.mirTy = aarchCGFunc->GetFunction().GetNthParamType(argIndex); argInfo.symSize = aarchCGFunc->GetBecommon().GetTypeSize(argInfo.mirTy->GetTypeIndex()); - argInfo.stkSize = (argInfo.symSize < k4ByteSize) ? k4ByteSize : argInfo.symSize; + argInfo.memPairSecondRegSize = 0; + argInfo.doMemPairOpt = false; + if ((argInfo.symSize > k8ByteSize) && (argInfo.symSize <= k16ByteSize)) { + if (numFpRegs[argIndex] > kOneRegister) { + argInfo.symSize = argInfo.stkSize = fpSize[argIndex]; + } else { + if (argInfo.symSize > k12ByteSize) { + argInfo.memPairSecondRegSize = k8ByteSize; + } else { + /* Round to 4 the stack space required for storing the struct */ + argInfo.memPairSecondRegSize = k4ByteSize; + } + argInfo.doMemPairOpt = true; + argInfo.symSize = argInfo.stkSize = kSizeOfPtr; + } + } else if (argInfo.symSize > k16ByteSize) { + /* For large struct passing, a pointer to the copy is used. */ + argInfo.symSize = argInfo.stkSize = kSizeOfPtr; + } if ((argInfo.mirTy->GetPrimType() == PTY_agg) && (argInfo.symSize < k4ByteSize)) { + /* For small aggregate parameter, set to minimum of 4 bytes. */ + argInfo.symSize = argInfo.stkSize = k4ByteSize; + } else if (numFpRegs[argIndex] > kOneRegister) { + argInfo.symSize = argInfo.stkSize = fpSize[argIndex]; + } else { + argInfo.stkSize = (argInfo.symSize < k4ByteSize) ? k4ByteSize : argInfo.symSize; + if (argInfo.symSize > k4ByteSize) { + argInfo.symSize = k8ByteSize; + } + } argInfo.regType = (argInfo.reg < V0) ? kRegTyInt : kRegTyFloat; argInfo.sym = aarchCGFunc->GetFunction().GetFormal(argIndex); CHECK_NULL_FATAL(argInfo.sym); argInfo.symLoc = static_cast(aarchCGFunc->GetMemlayout()->GetSymAllocInfo(argInfo.sym->GetStIndex())); CHECK_NULL_FATAL(argInfo.symLoc); + if (argInfo.doMemPairOpt && (aarchCGFunc->GetBaseOffset(*(argInfo.symLoc)) & 0x7)) { + /* Do not optimize for struct reg pair for unaligned access. + * However, this symbol requires two parameter registers, separate stores must be generated. + */ + argInfo.symSize = kSizeOfPtr; + argInfo.doMemPairOpt = false; + } return argInfo; } @@ -122,7 +189,23 @@ void AArch64MoveRegArgs::GenerateStpInsn(const ArgInfo &firstArgInfo, const ArgI aarchCGFunc->GetCurBB()->AppendInsn(pushInsn); } -void AArch64MoveRegArgs::GenerateStrInsn(ArgInfo &argInfo) { +void AArch64MoveRegArgs::GenOneInsn(ArgInfo &argInfo, AArch64RegOperand &baseOpnd, uint32 stBitSize, AArch64reg dest, + int32 offset) { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + MOperator mOp = aarchCGFunc->PickStInsn(stBitSize, argInfo.mirTy->GetPrimType()); + RegOperand ®Opnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(dest, stBitSize, argInfo.regType); + + AArch64OfstOperand &offsetOpnd = aarchCGFunc->CreateOfstOpnd(offset, k32BitSize); + MemOperand *memOpnd = aarchCGFunc->GetMemoryPool()->New(AArch64MemOperand::kAddrModeBOi, + stBitSize, baseOpnd, nullptr, &offsetOpnd, argInfo.sym); + Insn &insn = aarchCGFunc->GetCG()->BuildInstruction(mOp, regOpnd, *memOpnd); + if (aarchCGFunc->GetCG()->GenerateVerboseCG()) { + insn.SetComment(std::string("store param: ").append(argInfo.sym->GetName())); + } + aarchCGFunc->GetCurBB()->AppendInsn(insn); +} + +void AArch64MoveRegArgs::GenerateStrInsn(ArgInfo &argInfo, AArch64reg reg2, uint32 numFpRegs, uint32 fpSize) { AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); int32 stOffset = aarchCGFunc->GetBaseOffset(*argInfo.symLoc); AArch64RegOperand *baseOpnd = static_cast(aarchCGFunc->GetBaseReg(*argInfo.symLoc)); @@ -155,6 +238,24 @@ void AArch64MoveRegArgs::GenerateStrInsn(ArgInfo &argInfo) { insn.SetComment(std::string("store param: ").append(argInfo.sym->GetName())); } aarchCGFunc->GetCurBB()->AppendInsn(insn); + + if (argInfo.doMemPairOpt) { + /* second half of the struct passing by registers. */ + uint32 part2BitSize = argInfo.memPairSecondRegSize * kBitsPerByte; + GenOneInsn(argInfo, *baseOpnd, part2BitSize, reg2, (stOffset + kSizeOfPtr)); + } else if (numFpRegs > 1) { + uint32 fpSizeBits = fpSize * kBitsPerByte; + AArch64reg regFp2 = static_cast(static_cast(argInfo.reg) + 1); + GenOneInsn(argInfo, *baseOpnd, fpSizeBits, regFp2, (stOffset + fpSize)); + if (numFpRegs > 2) { + AArch64reg regFp3 = static_cast(static_cast(argInfo.reg) + 2); + GenOneInsn(argInfo, *baseOpnd, fpSizeBits, regFp3, (stOffset + (fpSize * 2))); + } + if (numFpRegs > 3) { + AArch64reg regFp3 = static_cast(static_cast(argInfo.reg) + 3); + GenOneInsn(argInfo, *baseOpnd, fpSizeBits, regFp3, (stOffset + (fpSize * 3))); + } + } } void AArch64MoveRegArgs::MoveRegisterArgs() { @@ -165,18 +266,26 @@ void AArch64MoveRegArgs::MoveRegisterArgs() { std::map movePara; std::vector moveParaIndex; - CollectRegisterArgs(movePara, moveParaIndex); + std::map pairReg; + std::vector numFpRegs; + std::vector fpSize; + CollectRegisterArgs(movePara, moveParaIndex, pairReg, numFpRegs, fpSize); std::vector::iterator it; std::vector::iterator next; for (it = moveParaIndex.begin(); it != moveParaIndex.end(); ++it) { uint32 firstIndex = *it; - ArgInfo firstArgInfo = GetArgInfo(movePara, firstIndex); + ArgInfo firstArgInfo = GetArgInfo(movePara, numFpRegs, fpSize, firstIndex); next = it; ++next; - if (next != moveParaIndex.end()) { - uint32 secondIndex = *next; - ArgInfo secondArgInfo = GetArgInfo(movePara, secondIndex); + if ((next != moveParaIndex.end()) || (firstArgInfo.doMemPairOpt)) { + uint32 secondIndex = (firstArgInfo.doMemPairOpt) ? firstIndex : *next; + ArgInfo secondArgInfo = GetArgInfo(movePara, numFpRegs, fpSize, secondIndex); + secondArgInfo.reg = (firstArgInfo.doMemPairOpt) ? pairReg[firstIndex] : movePara[secondIndex]; + secondArgInfo.symSize = (firstArgInfo.doMemPairOpt) ? firstArgInfo.memPairSecondRegSize : secondArgInfo.symSize; + secondArgInfo.symLoc = (firstArgInfo.doMemPairOpt) ? secondArgInfo.symLoc : + static_cast(aarchCGFunc->GetMemlayout()->GetSymAllocInfo( + secondArgInfo.sym->GetStIndex())); /* Make sure they are in same segment if want to use stp */ if (IsInSameSegment(firstArgInfo, secondArgInfo)) { GenerateStpInsn(firstArgInfo, secondArgInfo); @@ -184,7 +293,7 @@ void AArch64MoveRegArgs::MoveRegisterArgs() { continue; } } - GenerateStrInsn(firstArgInfo); + GenerateStrInsn(firstArgInfo, pairReg[firstIndex], numFpRegs[firstIndex], fpSize[firstIndex]); } aarchCGFunc->GetFirstBB()->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index 922b06e567f62164b9ad06af79c51aa9f81838bb..33f5d9cf243b86640ac0259a65ecc0311f4bd7b4 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -1073,17 +1073,37 @@ void AArch64CGFunc::SelectAggDassign(DassignNode &stmt) { PregIdx pregIdx = rhsregread->GetRegIdx(); if (IsSpecialPseudoRegister(pregIdx)) { if ((-pregIdx) == kSregRetval0) { - CHECK_FATAL(lhsSize <= k16ByteSize, "SelectAggDassign: Incorrect agg size"); - RegOperand &parm1 = GetOrCreateSpecialRegisterOperand(pregIdx); - Operand &memopnd1 = GetOrCreateMemOpnd(*lhsSymbol, 0, k64BitSize); - MOperator mop1 = PickStInsn(k64BitSize, PTY_u64); - GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop1, parm1, memopnd1)); - if (lhsSize > k8ByteSize) { - RegOperand &parm2 = GetOrCreatePhysicalRegisterOperand(R1, k64BitSize, kRegTyInt); - Operand &memopnd2 = GetOrCreateMemOpnd(*lhsSymbol, k8ByteSize, k64BitSize); - MOperator mop2 = PickStInsn(k64BitSize, PTY_u64); - GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop2, parm2, memopnd2)); + ParmLocator parmlocator(GetBecommon()); + PLocInfo pLoc; + PrimType retPtype; + RegType regType; + uint32 memSize; + uint32 regSize; + parmlocator.LocateRetVal(*lhsType, pLoc); + AArch64reg r[4]; + r[0] = pLoc.reg0; + r[1] = pLoc.reg1; + r[2] = pLoc.reg2; + r[3] = pLoc.reg3; + if (pLoc.numFpPureRegs) { + regSize = (pLoc.fpSize == 4) ? 32 : 64; + memSize = pLoc.fpSize; + retPtype = (pLoc.fpSize == 4) ? PTY_f32 : PTY_f64; + regType = kRegTyFloat; + } else { + regSize = 64; + memSize = 8; + retPtype = PTY_u64; + regType = kRegTyInt; } + for (uint32 i = 0; i < 4; ++i) { + if (r[i] == kRinvalid) { + break; + } + RegOperand &parm = GetOrCreatePhysicalRegisterOperand(r[i], regSize, regType); + Operand &mOpnd = GetOrCreateMemOpnd(*lhsSymbol, memSize * i, regSize); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(PickStInsn(regSize, retPtype), parm, mOpnd)); + } isRet = true; } } @@ -1227,35 +1247,81 @@ void AArch64CGFunc::SelectAggIassign(IassignNode &stmt, Operand &AddrOpnd) { if (stmtType->GetPrimType() == PTY_agg) { /* generate move to regs. */ CHECK_FATAL(lhsSize <= k16ByteSize, "SelectAggIassign: illegal struct size"); + ParmLocator parmlocator(GetBecommon()); + PLocInfo pLoc; + MIRSymbol *retSt = GetBecommon().GetMIRModule().CurFunction()->GetFormal(0); + if (retSt == addrSym) { + // return value + parmlocator.LocateNextParm(*lhsType, pLoc, true); + } else { + parmlocator.InitPLocInfo(pLoc); + } /* aggregates are 8 byte aligned. */ Operand *rhsmemopnd = nullptr; - RegOperand *result[kTwoRegister]; /* maximum 16 bytes, 2 registers */ + RegOperand *result[kFourRegister]; /* up to 2 int or 4 fp */ + uint32 loadSize; + uint32 numRegs; + RegType regType; + PrimType retPty; + bool fpParm = false; + if (pLoc.numFpPureRegs) { + loadSize = pLoc.fpSize; + numRegs = pLoc.numFpPureRegs; + fpParm = true; + regType = kRegTyFloat; + retPty = (pLoc.fpSize == k4ByteSize) ? PTY_f32 : PTY_f64; + } else { + loadSize = (lhsSize <= k4ByteSize) ? k4ByteSize : k8ByteSize; + numRegs = (lhsSize <= k8ByteSize) ? kOneRegister : kTwoRegister; + regType = kRegTyInt; + retPty = PTY_u32; + } bool parmCopy = IsParamStructCopy(*rhsSymbol); - uint32 loadSize = (lhsSize <= k4ByteSize) ? k4ByteSize : k8ByteSize; - uint32 numRegs = (lhsSize <= k8ByteSize) ? kOneRegister : kTwoRegister; for (uint32 i = 0; i < numRegs; i++) { if (parmCopy) { - rhsmemopnd = &LoadStructCopyBase(*rhsSymbol, rhsOffset + i * k8ByteSize, loadSize * kBitsPerByte); + rhsmemopnd = &LoadStructCopyBase(*rhsSymbol, (rhsOffset + i * (fpParm ? loadSize : k8ByteSize)), + (loadSize * kBitsPerByte)); } else { - rhsmemopnd = &GetOrCreateMemOpnd(*rhsSymbol, rhsOffset + i * k8ByteSize, loadSize * kBitsPerByte); + rhsmemopnd = &GetOrCreateMemOpnd(*rhsSymbol, (rhsOffset + i * (fpParm ? loadSize : k8ByteSize)), + (loadSize * kBitsPerByte)); } - result[i] = &CreateVirtualRegisterOperand(NewVReg(kRegTyInt, loadSize)); - MOperator mop1 = PickLdInsn(loadSize * kBitsPerByte, PTY_u32); + result[i] = &CreateVirtualRegisterOperand(NewVReg(regType, loadSize)); + MOperator mop1 = PickLdInsn(loadSize * kBitsPerByte, retPty); Insn &ld = GetCG()->BuildInstruction(mop1, *(result[i]), *rhsmemopnd); GetCurBB()->AppendInsn(ld); } + AArch64reg regs[kFourRegister]; + regs[0] = pLoc.reg0; + regs[1] = pLoc.reg1; + regs[2] = pLoc.reg2; + regs[3] = pLoc.reg3; for (uint32 i = 0; i < numRegs; i++) { - AArch64reg preg = (i == 0 ? R0 : R1); - RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, kRegTyInt); - MOperator mop2 = (loadSize == k4ByteSize) ? MOP_wmovrr : MOP_xmovrr; + AArch64reg preg; + MOperator mop2; + if (fpParm) { + preg = regs[i]; + mop2 = (loadSize == k4ByteSize) ? MOP_xvmovs : MOP_xvmovd; + } else { + preg = (i == 0 ? R0 : R1); + mop2 = (loadSize == k4ByteSize) ? MOP_wmovrr: MOP_xmovrr; + } + RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, (loadSize * kBitsPerByte), regType); Insn &mov = GetCG()->BuildInstruction(mop2, dest, *(result[i])); GetCurBB()->AppendInsn(mov); } /* Create artificial dependency to extend the live range */ for (uint32 i = 0; i < numRegs; i++) { - AArch64reg preg = (i == 0 ? R0 : R1); - RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, kRegTyInt); - Insn &pseudo = cg->BuildInstruction(MOP_pseudo_ret_int, dest); + AArch64reg preg; + MOperator mop3; + if (fpParm) { + preg = regs[i]; + mop3 = MOP_pseudo_ret_float; + } else { + preg = (i == 0 ? R0 : R1); + mop3 = MOP_pseudo_ret_int; + } + RegOperand &dest = GetOrCreatePhysicalRegisterOperand(preg, loadSize * kBitsPerByte, regType); + Insn &pseudo = GetCG()->BuildInstruction(mop3, dest); GetCurBB()->AppendInsn(pseudo); } return; @@ -5005,11 +5071,31 @@ void AArch64CGFunc::SelectParmListDreadSmallAggregate(MIRSymbol &sym, MIRType &s CreateCallStructParamPassByStack(symSize, &sym, nullptr, ploc.memOffset); } else { /* pass by param regs. */ - MemOperand &mopnd0 = GetOrCreateMemOpnd(sym, 0, k64BitSize); - CreateCallStructParamPassByReg(ploc.reg0, mopnd0, srcOpnds); + fpParamState state = kStateUnknown; + uint32 memSize = 0; + if (ploc.fpSize == 0) { + state = kNotFp; + memSize = k64BitSize; + } else if (ploc.fpSize == 4) { + state = kFp32Bit; + memSize = k32BitSize; + } else if (ploc.fpSize == 8) { + state = kFp64Bit; + memSize = k64BitSize; + } + MemOperand &mopnd0 = GetOrCreateMemOpnd(sym, 0, memSize); + CreateCallStructParamPassByReg(ploc.reg0, mopnd0, srcOpnds, state); if (ploc.reg1) { - MemOperand &mopnd1 = GetOrCreateMemOpnd(sym, kSizeOfPtr, k64BitSize); - CreateCallStructParamPassByReg(ploc.reg1, mopnd1, srcOpnds); + MemOperand &mopnd1 = GetOrCreateMemOpnd(sym, (ploc.fpSize ? ploc.fpSize : kSizeOfPtr), memSize); + CreateCallStructParamPassByReg(ploc.reg1, mopnd1, srcOpnds, state); + } + if (ploc.reg2) { + MemOperand &mopnd2 = GetOrCreateMemOpnd(sym, (ploc.fpSize ? (ploc.fpSize * 2) : kSizeOfPtr), memSize); + CreateCallStructParamPassByReg(ploc.reg2, mopnd2, srcOpnds, state); + } + if (ploc.reg3) { + MemOperand &mopnd3 = GetOrCreateMemOpnd(sym, (ploc.fpSize ? (ploc.fpSize * 3) : kSizeOfPtr), memSize); + CreateCallStructParamPassByReg(ploc.reg3, mopnd3, srcOpnds, state); } } } @@ -5026,14 +5112,40 @@ void AArch64CGFunc::SelectParmListIreadSmallAggregate(const IreadNode &iread, MI CreateCallStructParamPassByStack(symSize, nullptr, addrOpnd1, ploc.memOffset); } else { /* pass by param regs. */ + fpParamState state = kStateUnknown; + uint32 memSize = 0; + switch (ploc.fpSize) { + case 0: + state = kNotFp; + memSize = k64BitSize; + break; + case 4: + state = kFp32Bit; + memSize = k32BitSize; + break; + case 8: + state = kFp64Bit; + memSize = k64BitSize; + break; + } AArch64OfstOperand *offOpnd0 = &GetOrCreateOfstOpnd(0, k32BitSize); MemOperand *mopnd = - &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, addrOpnd1, nullptr, offOpnd0, nullptr); - CreateCallStructParamPassByReg(ploc.reg0, *mopnd, srcOpnds); + &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, memSize, addrOpnd1, nullptr, offOpnd0, nullptr); + CreateCallStructParamPassByReg(ploc.reg0, *mopnd, srcOpnds, state); if (ploc.reg1) { - AArch64OfstOperand *offOpnd1 = &GetOrCreateOfstOpnd(kSizeOfPtr, k32BitSize); - mopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, k64BitSize, addrOpnd1, nullptr, offOpnd1, nullptr); - CreateCallStructParamPassByReg(ploc.reg1, *mopnd, srcOpnds); + AArch64OfstOperand *offOpnd1 = &GetOrCreateOfstOpnd((ploc.fpSize ? ploc.fpSize : kSizeOfPtr), k32BitSize); + mopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, memSize, addrOpnd1, nullptr, offOpnd1, nullptr); + CreateCallStructParamPassByReg(ploc.reg1, *mopnd, srcOpnds, state); + } + if (ploc.reg2) { + AArch64OfstOperand *offOpnd2 = &GetOrCreateOfstOpnd((ploc.fpSize ? (ploc.fpSize * 2): kSizeOfPtr), k32BitSize); + mopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, memSize, addrOpnd1, nullptr, offOpnd2, nullptr); + CreateCallStructParamPassByReg(ploc.reg2, *mopnd, srcOpnds, state); + } + if (ploc.reg3) { + AArch64OfstOperand *offOpnd3 = &GetOrCreateOfstOpnd((ploc.fpSize ? (ploc.fpSize * 3): kSizeOfPtr), k32BitSize); + mopnd = &GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOi, memSize, addrOpnd1, nullptr, offOpnd3, nullptr); + CreateCallStructParamPassByReg(ploc.reg3, *mopnd, srcOpnds, state); } } } @@ -5064,9 +5176,12 @@ void AArch64CGFunc::SelectParmListDreadLargeAggregate(MIRSymbol &sym, MIRType &s parmLocator.LocateNextParm(structType, ploc); uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); /* round up */ /* Create the struct copies. */ - AArch64RegOperand *parmOpnd = CreateCallStructParamCopyToStack(numMemOp, &sym, nullptr, structCopyOffset, ploc.reg0); - srcOpnds.PushOpnd(*parmOpnd); + AArch64RegOperand *parmOpnd = CreateCallStructParamCopyToStack(numMemOp, &sym, nullptr, structCopyOffset, ploc); + if (parmOpnd) { + srcOpnds.PushOpnd(*parmOpnd); + } structCopyOffset += (numMemOp * kSizeOfPtr); +//GetCurBB()->Dump(); } void AArch64CGFunc::SelectParmListIreadLargeAggregate(const IreadNode &iread, MIRType &structType, @@ -5079,9 +5194,11 @@ void AArch64CGFunc::SelectParmListIreadLargeAggregate(const IreadNode &iread, MI parmLocator.LocateNextParm(structType, ploc); uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); /* round up */ AArch64RegOperand *parmOpnd = - CreateCallStructParamCopyToStack(numMemOp, nullptr, addrOpnd1, structCopyOffset, ploc.reg0); + CreateCallStructParamCopyToStack(numMemOp, nullptr, addrOpnd1, structCopyOffset, ploc); structCopyOffset += (numMemOp * kSizeOfPtr); - srcOpnds.PushOpnd(*parmOpnd); + if (parmOpnd) { + srcOpnds.PushOpnd(*parmOpnd); + } } void AArch64CGFunc::CreateCallStructParamPassByStack(int32 symSize, MIRSymbol *sym, @@ -5103,10 +5220,30 @@ void AArch64CGFunc::CreateCallStructParamPassByStack(int32 symSize, MIRSymbol *s } } -void AArch64CGFunc::CreateCallStructParamPassByReg(AArch64reg reg, MemOperand &memOpnd, AArch64ListOperand &srcOpnds) { - AArch64RegOperand &parmOpnd = GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyInt); - GetCurBB()->AppendInsn(cg->BuildInstruction(PickLdInsn(k64BitSize, PTY_i64), parmOpnd, memOpnd)); - srcOpnds.PushOpnd(parmOpnd); +void AArch64CGFunc::CreateCallStructParamPassByReg(AArch64reg reg, MemOperand &memOpnd, AArch64ListOperand &srcOpnds, + fpParamState state) { + AArch64RegOperand *parmOpnd; + uint32 dataSizeBits = 0; + PrimType pType = PTY_void; + parmOpnd = nullptr; + if (state == kNotFp) { + parmOpnd = &GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyInt); + dataSizeBits = GetPrimTypeSize(PTY_i64) * kBitsPerByte; + pType = PTY_i64; + } else if (state == kFp32Bit) { + parmOpnd = &GetOrCreatePhysicalRegisterOperand(reg, k32BitSize, kRegTyFloat); + dataSizeBits = GetPrimTypeSize(PTY_f32) * kBitsPerByte; + pType = PTY_f32; + } else if (state == kFp64Bit) { + parmOpnd = &GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyFloat); + dataSizeBits = GetPrimTypeSize(PTY_f64) * kBitsPerByte; + pType = PTY_f64; + } else { + ASSERT(0, "CreateCallStructParamPassByReg: Unknown state"); + } + + GetCurBB()->AppendInsn(cg->BuildInstruction(PickLdInsn(dataSizeBits, pType), *parmOpnd, memOpnd)); + srcOpnds.PushOpnd(*parmOpnd); } void AArch64CGFunc::CreateCallStructParamMemcpy(const MIRSymbol *sym, RegOperand *addropnd, @@ -5171,7 +5308,7 @@ void AArch64CGFunc::CreateCallStructParamMemcpy(const MIRSymbol *sym, RegOperand } AArch64RegOperand *AArch64CGFunc::CreateCallStructParamCopyToStack(uint32 numMemOp, MIRSymbol *sym, RegOperand *addrOpd, - int32 copyOffset, AArch64reg reg) { + int32 copyOffset, PLocInfo &ploc) { /* Create the struct copies. */ MemOperand *ldMopnd = nullptr; MemOperand *stMopnd = nullptr; @@ -5189,12 +5326,20 @@ AArch64RegOperand *AArch64CGFunc::CreateCallStructParamCopyToStack(uint32 numMem GetCurBB()->AppendInsn(cg->BuildInstruction(PickStInsn(k64BitSize, PTY_i64), *vreg, *stMopnd)); } /* Create the copy address parameter for the struct */ - AArch64RegOperand *parmOpnd = &GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, kRegTyInt); - AArch64ImmOperand *offset = &CreateImmOperand(copyOffset, k64BitSize, false); RegOperand *fpopnd = &GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); - SelectAdd(*parmOpnd, *fpopnd, *offset, PTY_a64); - - return parmOpnd; + AArch64ImmOperand *offset = &CreateImmOperand(copyOffset, k64BitSize, false); + if (ploc.reg0 == kRinvalid) { + RegOperand &res = CreateRegisterOperandOfType(PTY_u64); + SelectAdd(res, *fpopnd, *offset, PTY_u64); + MemOperand &stMopnd2 = CreateMemOpnd(RSP, ploc.memOffset, k64BitSize); + GetCurBB()->AppendInsn( + GetCG()->BuildInstruction(PickStInsn(k64BitSize, PTY_i64), res, stMopnd2)); + return nullptr; + } else { + AArch64RegOperand *parmOpnd = &GetOrCreatePhysicalRegisterOperand(ploc.reg0, k64BitSize, kRegTyInt); + SelectAdd(*parmOpnd, *fpopnd, *offset, PTY_a64); + return parmOpnd; + } } void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, @@ -5305,7 +5450,6 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, AArch64ListOperand &srcOp if ((naryNode.GetOpCode() == OP_icall) || isCallNative) { i++; } - int32 structCopyOffset = GetMaxParamStackSize() - GetStructCopySize(); for (uint32 pnum = 0; i < naryNode.NumOpnds(); ++i, ++pnum) { MIRType *ty = nullptr; diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp index a6b58aff6fb7e5c91c669a4fa55755a48d3854e5..ed9b96dc9456968a4a4e0aff5dc4f727d85132bb 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp @@ -150,6 +150,20 @@ void AArch64MemLayout::LayoutVarargParams() { nFpRegs++; } } + if (ploc.reg2 != kRinvalid) { + if (ploc.reg2 >= R0 && ploc.reg2 <= R7) { + nIntRegs++; + } else if (ploc.reg2 >= V0 && ploc.reg2 <= V7) { + nFpRegs++; + } + } + if (ploc.reg3 != kRinvalid) { + if (ploc.reg3 >= R0 && ploc.reg3 <= R7) { + nIntRegs++; + } else if (ploc.reg2 >= V0 && ploc.reg2 <= V7) { + nFpRegs++; + } + } } SetSizeOfGRSaveArea((k8BitSize - nIntRegs) * kSizeOfPtr); SetSizeOfVRSaveArea((k8BitSize - nFpRegs) * kSizeOfPtr * k2ByteSize); @@ -169,7 +183,7 @@ void AArch64MemLayout::LayoutFormalParams() { AArch64SymbolAlloc *symLoc = memAllocator->GetMemPool()->New(); SetSymAllocInfo(stIndex, *symLoc); if (ploc.reg0 != kRinvalid) { /* register */ - symLoc->SetRegisters(ploc.reg0, ploc.reg1); + symLoc->SetRegisters(ploc.reg0, ploc.reg1, ploc.reg2, ploc.reg3); if (mirFunction->GetNthParamAttr(i).GetAttr(ATTR_localrefvar)) { symLoc->SetMemSegment(segRefLocals); SetSegmentSize(*symLoc, segRefLocals, ptyIdx); diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp index 6442c581402c543ba87c8ca1521200a85a18040b..1d2c84df475f3093525527391cba27d632078c6c 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp @@ -32,7 +32,8 @@ const char *CondOperand::ccStrs[kCcLast] = { bool AArch64RegOperand::IsSaveReg(MIRType &type, BECommon &beCommon) const { ReturnMechanism retMechanism(type, beCommon); if (retMechanism.GetRegCount() > 0) { - return GetRegisterNumber() == retMechanism.GetReg0() || GetRegisterNumber() == retMechanism.GetReg1(); + return GetRegisterNumber() == retMechanism.GetReg0() || GetRegisterNumber() == retMechanism.GetReg1() || + GetRegisterNumber() == retMechanism.GetReg2() || GetRegisterNumber() == retMechanism.GetReg3(); } return false; } diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_reaching.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_reaching.cpp index 4fd22916659e87e3eeec3c6642ff48a07618c4b4..e49ded12a0cc72885675888bad1ed934fb862ec5 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_reaching.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_reaching.cpp @@ -45,9 +45,6 @@ void AArch64ReachingDefinition::InitStartGen() { RegType regType = (pLoc.reg0 < V0) ? kRegTyInt : kRegTyFloat; uint32 srcBitSize = ((symSize < k4ByteSize) ? k4ByteSize : symSize) * kBitsPerByte; - AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); - RegOperand ®Opnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg0, srcBitSize, regType); - MOperator mOp; if (regType == kRegTyInt) { if (srcBitSize <= k32BitSize) { @@ -63,13 +60,27 @@ void AArch64ReachingDefinition::InitStartGen() { } } + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + RegOperand ®Opnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg0, srcBitSize, regType); Insn &pseudoInsn = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd); bb->InsertInsnBegin(pseudoInsn); pseudoInsns.emplace_back(&pseudoInsn); - if (pLoc.reg1) { - regOpnd = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg1, srcBitSize, regType); - Insn &pseudoInsn1 = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd); + RegOperand ®Opnd1 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg1, srcBitSize, regType); + Insn &pseudoInsn1 = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd1); + bb->InsertInsnBegin(pseudoInsn1); + pseudoInsns.emplace_back(&pseudoInsn1); + } + if (pLoc.reg2) { + RegOperand ®Opnd2 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg2, srcBitSize, regType); + Insn &pseudoInsn1 = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd2); + bb->InsertInsnBegin(pseudoInsn1); + pseudoInsns.emplace_back(&pseudoInsn1); + } + if (pLoc.reg3) { + RegOperand ®Opnd3 = aarchCGFunc->GetOrCreatePhysicalRegisterOperand(pLoc.reg3, srcBitSize, regType); + Insn &pseudoInsn1 = cgFunc->GetCG()->BuildInstruction(mOp, regOpnd3); bb->InsertInsnBegin(pseudoInsn1); pseudoInsns.emplace_back(&pseudoInsn1); }