mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AArch64][SVE] Spilling/filling of SVE callee-saves.
Implement the spills/fills of callee-saved SVE registers using STR and LDR instructions. Also adds the `aarch64_sve_vector_pcs` attribute to specify the callee-saved registers to be used for functions that return SVE vectors or take SVE vectors as arguments. The callee-saved registers are vector registers z8-z23 and predicate registers p4-p15. The overal frame-layout with SVE will be as follows: +-------------+ | stack args | +-------------+ | Callee Saves| | X29, X30 | |-------------| <- FP | SVE Callee | < ////////////// | saved regs | < ////////////// | z23 | < ////////////// | : | < // SCALABLE // | z8 | < ////////////// | p15 | < /// STACK //// | : | < ////////////// | p4 | < //// AREA //// +-------------+ < ////////////// | : | < ////////////// | SVE locals | < ////////////// | : | < ////////////// +-------------+ |/////////////| alignment gap. | : | | Stack objs | | : | +-------------+ <- SP after call and frame-setup Reviewers: cameron.mcinally, efriedma, greened, thegameg, ostannard, rengolin Reviewed By: ostannard Differential Revision: https://reviews.llvm.org/D68996
This commit is contained in:
parent
6f39af85a4
commit
ca5d75ffa1
@ -594,6 +594,7 @@ lltok::Kind LLLexer::LexIdentifier() {
|
|||||||
KEYWORD(arm_aapcscc);
|
KEYWORD(arm_aapcscc);
|
||||||
KEYWORD(arm_aapcs_vfpcc);
|
KEYWORD(arm_aapcs_vfpcc);
|
||||||
KEYWORD(aarch64_vector_pcs);
|
KEYWORD(aarch64_vector_pcs);
|
||||||
|
KEYWORD(aarch64_sve_vector_pcs);
|
||||||
KEYWORD(msp430_intrcc);
|
KEYWORD(msp430_intrcc);
|
||||||
KEYWORD(avr_intrcc);
|
KEYWORD(avr_intrcc);
|
||||||
KEYWORD(avr_signalcc);
|
KEYWORD(avr_signalcc);
|
||||||
|
@ -1931,6 +1931,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
|
|||||||
/// ::= 'arm_aapcscc'
|
/// ::= 'arm_aapcscc'
|
||||||
/// ::= 'arm_aapcs_vfpcc'
|
/// ::= 'arm_aapcs_vfpcc'
|
||||||
/// ::= 'aarch64_vector_pcs'
|
/// ::= 'aarch64_vector_pcs'
|
||||||
|
/// ::= 'aarch64_sve_vector_pcs'
|
||||||
/// ::= 'msp430_intrcc'
|
/// ::= 'msp430_intrcc'
|
||||||
/// ::= 'avr_intrcc'
|
/// ::= 'avr_intrcc'
|
||||||
/// ::= 'avr_signalcc'
|
/// ::= 'avr_signalcc'
|
||||||
@ -1977,6 +1978,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
|
|||||||
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
|
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
|
||||||
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
|
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
|
||||||
case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
|
case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
|
||||||
|
case lltok::kw_aarch64_sve_vector_pcs:
|
||||||
|
CC = CallingConv::AArch64_SVE_VectorCall;
|
||||||
|
break;
|
||||||
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
|
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
|
||||||
case lltok::kw_avr_intrcc: CC = CallingConv::AVR_INTR; break;
|
case lltok::kw_avr_intrcc: CC = CallingConv::AVR_INTR; break;
|
||||||
case lltok::kw_avr_signalcc: CC = CallingConv::AVR_SIGNAL; break;
|
case lltok::kw_avr_signalcc: CC = CallingConv::AVR_SIGNAL; break;
|
||||||
|
@ -142,6 +142,7 @@ enum Kind {
|
|||||||
kw_arm_aapcscc,
|
kw_arm_aapcscc,
|
||||||
kw_arm_aapcs_vfpcc,
|
kw_arm_aapcs_vfpcc,
|
||||||
kw_aarch64_vector_pcs,
|
kw_aarch64_vector_pcs,
|
||||||
|
kw_aarch64_sve_vector_pcs,
|
||||||
kw_msp430_intrcc,
|
kw_msp430_intrcc,
|
||||||
kw_avr_intrcc,
|
kw_avr_intrcc,
|
||||||
kw_avr_signalcc,
|
kw_avr_signalcc,
|
||||||
|
@ -364,6 +364,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
|
|||||||
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
|
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
|
||||||
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
|
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
|
||||||
case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
|
case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
|
||||||
|
case CallingConv::AArch64_SVE_VectorCall:
|
||||||
|
Out << "aarch64_sve_vector_pcs";
|
||||||
|
break;
|
||||||
case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
|
case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
|
||||||
case CallingConv::AVR_INTR: Out << "avr_intrcc "; break;
|
case CallingConv::AVR_INTR: Out << "avr_intrcc "; break;
|
||||||
case CallingConv::AVR_SIGNAL: Out << "avr_signalcc "; break;
|
case CallingConv::AVR_SIGNAL: Out << "avr_signalcc "; break;
|
||||||
|
@ -405,10 +405,10 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
|
|||||||
|
|
||||||
// Functions taking SVE arguments or returning an SVE type
|
// Functions taking SVE arguments or returning an SVE type
|
||||||
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
|
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
|
||||||
def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
|
def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
|
||||||
X25, X26, X27, X28, LR, FP,
|
(sequence "P%u", 4, 15),
|
||||||
(sequence "Z%u", 8, 23),
|
X19, X20, X21, X22, X23, X24,
|
||||||
(sequence "P%u", 4, 15))>;
|
X25, X26, X27, X28, LR, FP)>;
|
||||||
|
|
||||||
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
|
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
|
||||||
// 'this' and the pointer return value are both passed in X0 in these cases,
|
// 'this' and the pointer return value are both passed in X0 in these cases,
|
||||||
@ -486,5 +486,7 @@ def CSR_AArch64_RT_MostRegs_SCS
|
|||||||
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
|
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
|
||||||
def CSR_AArch64_AAVPCS_SCS
|
def CSR_AArch64_AAVPCS_SCS
|
||||||
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
|
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
|
||||||
|
def CSR_AArch64_SVE_AAPCS_SCS
|
||||||
|
: CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
|
||||||
def CSR_AArch64_AAPCS_SCS
|
def CSR_AArch64_AAPCS_SCS
|
||||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
|
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
|
||||||
|
@ -834,6 +834,20 @@ static bool isTargetDarwin(const MachineFunction &MF) {
|
|||||||
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
|
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convenience function to determine whether I is an SVE callee save.
|
||||||
|
bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
|
||||||
|
switch (I->getOpcode()) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case AArch64::STR_ZXI:
|
||||||
|
case AArch64::STR_PXI:
|
||||||
|
case AArch64::LDR_ZXI:
|
||||||
|
case AArch64::LDR_PXI:
|
||||||
|
return I->getFlag(MachineInstr::FrameSetup) ||
|
||||||
|
I->getFlag(MachineInstr::FrameDestroy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
MachineBasicBlock &MBB) const {
|
MachineBasicBlock &MBB) const {
|
||||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||||
@ -965,7 +979,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|||||||
// and pre-inc if we decided to combine the callee-save and local stack
|
// and pre-inc if we decided to combine the callee-save and local stack
|
||||||
// pointer bump above.
|
// pointer bump above.
|
||||||
MachineBasicBlock::iterator End = MBB.end();
|
MachineBasicBlock::iterator End = MBB.end();
|
||||||
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
|
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
|
||||||
|
!IsSVECalleeSave(MBBI)) {
|
||||||
if (CombineSPBump)
|
if (CombineSPBump)
|
||||||
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
|
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
|
||||||
NeedsWinCFI, &HasWinCFI);
|
NeedsWinCFI, &HasWinCFI);
|
||||||
@ -1107,7 +1122,35 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
|||||||
NumBytes = 0;
|
NumBytes = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
|
StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
|
||||||
|
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
|
||||||
|
|
||||||
|
// Process the SVE callee-saves to determine what space needs to be
|
||||||
|
// allocated.
|
||||||
|
if (AFI->getSVECalleeSavedStackSize()) {
|
||||||
|
// Find callee save instructions in frame.
|
||||||
|
CalleeSavesBegin = MBBI;
|
||||||
|
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
|
||||||
|
while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
|
||||||
|
++MBBI;
|
||||||
|
CalleeSavesEnd = MBBI;
|
||||||
|
|
||||||
|
int64_t OffsetToFirstCalleeSaveFromSP =
|
||||||
|
MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
|
||||||
|
StackOffset OffsetToCalleeSavesFromSP =
|
||||||
|
StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
|
||||||
|
AllocateBefore -= OffsetToCalleeSavesFromSP;
|
||||||
|
AllocateAfter = SVEStackSize - AllocateBefore;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate space for the callee saves (if any).
|
||||||
|
emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
|
||||||
|
-AllocateBefore, TII,
|
||||||
|
MachineInstr::FrameSetup);
|
||||||
|
|
||||||
|
// Finally allocate remaining SVE stack space.
|
||||||
|
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
|
||||||
|
-AllocateAfter, TII,
|
||||||
MachineInstr::FrameSetup);
|
MachineInstr::FrameSetup);
|
||||||
|
|
||||||
// Allocate space for the rest of the frame.
|
// Allocate space for the rest of the frame.
|
||||||
@ -1444,7 +1487,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
MachineBasicBlock::iterator Begin = MBB.begin();
|
MachineBasicBlock::iterator Begin = MBB.begin();
|
||||||
while (LastPopI != Begin) {
|
while (LastPopI != Begin) {
|
||||||
--LastPopI;
|
--LastPopI;
|
||||||
if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
|
if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
|
||||||
|
IsSVECalleeSave(LastPopI)) {
|
||||||
++LastPopI;
|
++LastPopI;
|
||||||
break;
|
break;
|
||||||
} else if (CombineSPBump)
|
} else if (CombineSPBump)
|
||||||
@ -1476,11 +1520,53 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
NumBytes -= PrologueSaveSize;
|
NumBytes -= PrologueSaveSize;
|
||||||
assert(NumBytes >= 0 && "Negative stack allocation size!?");
|
assert(NumBytes >= 0 && "Negative stack allocation size!?");
|
||||||
|
|
||||||
|
// Process the SVE callee-saves to determine what space needs to be
|
||||||
|
// deallocated.
|
||||||
|
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
|
||||||
|
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
|
||||||
|
if (AFI->getSVECalleeSavedStackSize()) {
|
||||||
|
RestoreBegin = std::prev(RestoreEnd);;
|
||||||
|
while (IsSVECalleeSave(RestoreBegin) &&
|
||||||
|
RestoreBegin != MBB.begin())
|
||||||
|
--RestoreBegin;
|
||||||
|
++RestoreBegin;
|
||||||
|
|
||||||
|
assert(IsSVECalleeSave(RestoreBegin) &&
|
||||||
|
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
|
||||||
|
|
||||||
|
int64_t OffsetToFirstCalleeSaveFromSP =
|
||||||
|
MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
|
||||||
|
StackOffset OffsetToCalleeSavesFromSP =
|
||||||
|
StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
|
||||||
|
DeallocateBefore = OffsetToCalleeSavesFromSP;
|
||||||
|
DeallocateAfter = SVEStackSize - DeallocateBefore;
|
||||||
|
}
|
||||||
|
|
||||||
// Deallocate the SVE area.
|
// Deallocate the SVE area.
|
||||||
if (SVEStackSize)
|
if (SVEStackSize) {
|
||||||
if (!AFI->isStackRealigned())
|
if (AFI->isStackRealigned()) {
|
||||||
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
|
if (AFI->getSVECalleeSavedStackSize())
|
||||||
TII, MachineInstr::FrameDestroy);
|
// Set SP to start of SVE area, from which the callee-save reloads
|
||||||
|
// can be done. The code below will deallocate the stack space
|
||||||
|
// space by moving FP -> SP.
|
||||||
|
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
|
||||||
|
-SVEStackSize, TII, MachineInstr::FrameDestroy);
|
||||||
|
} else {
|
||||||
|
if (AFI->getSVECalleeSavedStackSize()) {
|
||||||
|
// Deallocate the non-SVE locals first before we can deallocate (and
|
||||||
|
// restore callee saves) from the SVE area.
|
||||||
|
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
|
||||||
|
{NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
|
||||||
|
NumBytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
|
||||||
|
DeallocateBefore, TII, MachineInstr::FrameDestroy);
|
||||||
|
|
||||||
|
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
|
||||||
|
DeallocateAfter, TII, MachineInstr::FrameDestroy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!hasFP(MF)) {
|
if (!hasFP(MF)) {
|
||||||
bool RedZone = canUseRedZone(MF);
|
bool RedZone = canUseRedZone(MF);
|
||||||
@ -1813,11 +1899,28 @@ struct RegPairInfo {
|
|||||||
unsigned Reg2 = AArch64::NoRegister;
|
unsigned Reg2 = AArch64::NoRegister;
|
||||||
int FrameIdx;
|
int FrameIdx;
|
||||||
int Offset;
|
int Offset;
|
||||||
enum RegType { GPR, FPR64, FPR128 } Type;
|
enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
|
||||||
|
|
||||||
RegPairInfo() = default;
|
RegPairInfo() = default;
|
||||||
|
|
||||||
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
|
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
|
||||||
|
|
||||||
|
unsigned getScale() const {
|
||||||
|
switch (Type) {
|
||||||
|
case PPR:
|
||||||
|
return 2;
|
||||||
|
case GPR:
|
||||||
|
case FPR64:
|
||||||
|
return 8;
|
||||||
|
case ZPR:
|
||||||
|
case FPR128:
|
||||||
|
return 16;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Unsupported type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isScalable() const { return Type == PPR || Type == ZPR; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
@ -1842,7 +1945,8 @@ static void computeCalleeSaveRegisterPairs(
|
|||||||
CC == CallingConv::PreserveMost ||
|
CC == CallingConv::PreserveMost ||
|
||||||
(Count & 1) == 0) &&
|
(Count & 1) == 0) &&
|
||||||
"Odd number of callee-saved regs to spill!");
|
"Odd number of callee-saved regs to spill!");
|
||||||
int Offset = AFI->getCalleeSavedStackSize();
|
int ByteOffset = AFI->getCalleeSavedStackSize();
|
||||||
|
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
|
||||||
// On Linux, we will have either one or zero non-paired register. On Windows
|
// On Linux, we will have either one or zero non-paired register. On Windows
|
||||||
// with CFI, we can have multiple unpaired registers in order to utilize the
|
// with CFI, we can have multiple unpaired registers in order to utilize the
|
||||||
// available unwind codes. This flag assures that the alignment fixup is done
|
// available unwind codes. This flag assures that the alignment fixup is done
|
||||||
@ -1858,6 +1962,10 @@ static void computeCalleeSaveRegisterPairs(
|
|||||||
RPI.Type = RegPairInfo::FPR64;
|
RPI.Type = RegPairInfo::FPR64;
|
||||||
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
|
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
|
||||||
RPI.Type = RegPairInfo::FPR128;
|
RPI.Type = RegPairInfo::FPR128;
|
||||||
|
else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
|
||||||
|
RPI.Type = RegPairInfo::ZPR;
|
||||||
|
else if (AArch64::PPRRegClass.contains(RPI.Reg1))
|
||||||
|
RPI.Type = RegPairInfo::PPR;
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unsupported register class.");
|
llvm_unreachable("Unsupported register class.");
|
||||||
|
|
||||||
@ -1880,6 +1988,9 @@ static void computeCalleeSaveRegisterPairs(
|
|||||||
if (AArch64::FPR128RegClass.contains(NextReg))
|
if (AArch64::FPR128RegClass.contains(NextReg))
|
||||||
RPI.Reg2 = NextReg;
|
RPI.Reg2 = NextReg;
|
||||||
break;
|
break;
|
||||||
|
case RegPairInfo::PPR:
|
||||||
|
case RegPairInfo::ZPR:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1917,23 +2028,33 @@ static void computeCalleeSaveRegisterPairs(
|
|||||||
|
|
||||||
RPI.FrameIdx = CSI[i].getFrameIdx();
|
RPI.FrameIdx = CSI[i].getFrameIdx();
|
||||||
|
|
||||||
int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
|
int Scale = RPI.getScale();
|
||||||
Offset -= RPI.isPaired() ? 2 * Scale : Scale;
|
if (RPI.isScalable())
|
||||||
|
ScalableByteOffset -= Scale;
|
||||||
|
else
|
||||||
|
ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale;
|
||||||
|
|
||||||
|
assert(!(RPI.isScalable() && RPI.isPaired()) &&
|
||||||
|
"Paired spill/fill instructions don't exist for SVE vectors");
|
||||||
|
|
||||||
// Round up size of non-pair to pair size if we need to pad the
|
// Round up size of non-pair to pair size if we need to pad the
|
||||||
// callee-save area to ensure 16-byte alignment.
|
// callee-save area to ensure 16-byte alignment.
|
||||||
if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
|
if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
|
||||||
RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
|
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
|
||||||
|
!RPI.isPaired()) {
|
||||||
FixupDone = true;
|
FixupDone = true;
|
||||||
Offset -= 8;
|
ByteOffset -= 8;
|
||||||
assert(Offset % 16 == 0);
|
assert(ByteOffset % 16 == 0);
|
||||||
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
|
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
|
||||||
MFI.setObjectAlignment(RPI.FrameIdx, 16);
|
MFI.setObjectAlignment(RPI.FrameIdx, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
|
||||||
assert(Offset % Scale == 0);
|
assert(Offset % Scale == 0);
|
||||||
RPI.Offset = Offset / Scale;
|
RPI.Offset = Offset / Scale;
|
||||||
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
|
|
||||||
|
assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
|
||||||
|
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
|
||||||
"Offset out of bounds for LDP/STP immediate");
|
"Offset out of bounds for LDP/STP immediate");
|
||||||
|
|
||||||
RegPairs.push_back(RPI);
|
RegPairs.push_back(RPI);
|
||||||
@ -2025,6 +2146,16 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
|
|||||||
Size = 16;
|
Size = 16;
|
||||||
Align = 16;
|
Align = 16;
|
||||||
break;
|
break;
|
||||||
|
case RegPairInfo::ZPR:
|
||||||
|
StrOpc = AArch64::STR_ZXI;
|
||||||
|
Size = 16;
|
||||||
|
Align = 16;
|
||||||
|
break;
|
||||||
|
case RegPairInfo::PPR:
|
||||||
|
StrOpc = AArch64::STR_PXI;
|
||||||
|
Size = 2;
|
||||||
|
Align = 2;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
|
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
|
||||||
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
|
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
|
||||||
@ -2065,6 +2196,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
|
|||||||
if (NeedsWinCFI)
|
if (NeedsWinCFI)
|
||||||
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
|
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
|
||||||
|
|
||||||
|
// Update the StackIDs of the SVE stack slots.
|
||||||
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
|
if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
|
||||||
|
MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector);
|
||||||
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -2116,6 +2252,16 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
|
|||||||
Size = 16;
|
Size = 16;
|
||||||
Align = 16;
|
Align = 16;
|
||||||
break;
|
break;
|
||||||
|
case RegPairInfo::ZPR:
|
||||||
|
LdrOpc = AArch64::LDR_ZXI;
|
||||||
|
Size = 16;
|
||||||
|
Align = 16;
|
||||||
|
break;
|
||||||
|
case RegPairInfo::PPR:
|
||||||
|
LdrOpc = AArch64::LDR_PXI;
|
||||||
|
Size = 2;
|
||||||
|
Align = 2;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
|
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
|
||||||
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
|
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
|
||||||
@ -2150,12 +2296,20 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
|
|||||||
if (NeedsWinCFI)
|
if (NeedsWinCFI)
|
||||||
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
|
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
|
||||||
};
|
};
|
||||||
if (ReverseCSRRestoreSeq)
|
|
||||||
|
// SVE objects are always restored in reverse order.
|
||||||
|
for (const RegPairInfo &RPI : reverse(RegPairs))
|
||||||
|
if (RPI.isScalable())
|
||||||
|
EmitMI(RPI);
|
||||||
|
|
||||||
|
if (ReverseCSRRestoreSeq) {
|
||||||
for (const RegPairInfo &RPI : reverse(RegPairs))
|
for (const RegPairInfo &RPI : reverse(RegPairs))
|
||||||
EmitMI(RPI);
|
if (!RPI.isScalable())
|
||||||
else
|
EmitMI(RPI);
|
||||||
|
} else
|
||||||
for (const RegPairInfo &RPI : RegPairs)
|
for (const RegPairInfo &RPI : RegPairs)
|
||||||
EmitMI(RPI);
|
if (!RPI.isScalable())
|
||||||
|
EmitMI(RPI);
|
||||||
|
|
||||||
if (NeedShadowCallStackProlog) {
|
if (NeedShadowCallStackProlog) {
|
||||||
// Shadow call stack epilog: ldr x30, [x18, #-8]!
|
// Shadow call stack epilog: ldr x30, [x18, #-8]!
|
||||||
@ -2202,7 +2356,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||||||
SavedRegs.set(Reg);
|
SavedRegs.set(Reg);
|
||||||
|
|
||||||
bool RegUsed = SavedRegs.test(Reg);
|
bool RegUsed = SavedRegs.test(Reg);
|
||||||
unsigned PairedReg = CSRegs[i ^ 1];
|
unsigned PairedReg = AArch64::NoRegister;
|
||||||
|
if (AArch64::GPR64RegClass.contains(Reg) ||
|
||||||
|
AArch64::FPR64RegClass.contains(Reg) ||
|
||||||
|
AArch64::FPR128RegClass.contains(Reg))
|
||||||
|
PairedReg = CSRegs[i ^ 1];
|
||||||
|
|
||||||
if (!RegUsed) {
|
if (!RegUsed) {
|
||||||
if (AArch64::GPR64RegClass.contains(Reg) &&
|
if (AArch64::GPR64RegClass.contains(Reg) &&
|
||||||
!RegInfo->isReservedReg(MF, Reg)) {
|
!RegInfo->isReservedReg(MF, Reg)) {
|
||||||
@ -2226,10 +2385,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||||||
|
|
||||||
// Calculates the callee saved stack size.
|
// Calculates the callee saved stack size.
|
||||||
unsigned CSStackSize = 0;
|
unsigned CSStackSize = 0;
|
||||||
|
unsigned SVECSStackSize = 0;
|
||||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
for (unsigned Reg : SavedRegs.set_bits())
|
for (unsigned Reg : SavedRegs.set_bits()) {
|
||||||
CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
|
auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
|
||||||
|
if (AArch64::PPRRegClass.contains(Reg) ||
|
||||||
|
AArch64::ZPRRegClass.contains(Reg))
|
||||||
|
SVECSStackSize += RegSize;
|
||||||
|
else
|
||||||
|
CSStackSize += RegSize;
|
||||||
|
}
|
||||||
|
|
||||||
// Save number of saved regs, so we can easily update CSStackSize later.
|
// Save number of saved regs, so we can easily update CSStackSize later.
|
||||||
unsigned NumSavedRegs = SavedRegs.count();
|
unsigned NumSavedRegs = SavedRegs.count();
|
||||||
@ -2249,10 +2415,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||||||
dbgs() << "\n";);
|
dbgs() << "\n";);
|
||||||
|
|
||||||
// If any callee-saved registers are used, the frame cannot be eliminated.
|
// If any callee-saved registers are used, the frame cannot be eliminated.
|
||||||
unsigned MaxAlign = getStackAlignment();
|
|
||||||
int64_t SVEStackSize =
|
int64_t SVEStackSize =
|
||||||
alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
|
alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
|
||||||
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
|
|
||||||
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
|
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
|
||||||
|
|
||||||
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
||||||
@ -2313,6 +2477,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||||||
// instructions.
|
// instructions.
|
||||||
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
|
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
|
||||||
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
|
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
|
||||||
|
AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AArch64FrameLowering::enableStackSlotScavenging(
|
bool AArch64FrameLowering::enableStackSlotScavenging(
|
||||||
@ -2321,9 +2486,39 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
|
|||||||
return AFI->hasCalleeSaveStackFreeSpace();
|
return AFI->hasCalleeSaveStackFreeSpace();
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
|
/// returns true if there are any SVE callee saves.
|
||||||
unsigned &MaxAlign) const {
|
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
|
||||||
// Process all fixed stack objects.
|
int &Min, int &Max) {
|
||||||
|
if (!MFI.isCalleeSavedInfoValid())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Min = std::numeric_limits<int>::max();
|
||||||
|
Max = std::numeric_limits<int>::min();
|
||||||
|
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
|
||||||
|
for (auto &CS : CSI) {
|
||||||
|
if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
|
||||||
|
AArch64::PPRRegClass.contains(CS.getReg())) {
|
||||||
|
assert((Max == std::numeric_limits<int>::min() ||
|
||||||
|
Max + 1 == CS.getFrameIdx()) &&
|
||||||
|
"SVE CalleeSaves are not consecutive");
|
||||||
|
|
||||||
|
Min = std::min(Min, CS.getFrameIdx());
|
||||||
|
Max = std::max(Max, CS.getFrameIdx());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Min != std::numeric_limits<int>::max();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process all the SVE stack objects and determine offsets for each
|
||||||
|
// object. If AssignOffsets is true, the offsets get assigned.
|
||||||
|
// Fills in the first and last callee-saved frame indices into
|
||||||
|
// Min/MaxCSFrameIndex, respectively.
|
||||||
|
// Returns the size of the stack.
|
||||||
|
static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
|
||||||
|
int &MinCSFrameIndex,
|
||||||
|
int &MaxCSFrameIndex,
|
||||||
|
bool AssignOffsets) {
|
||||||
|
// First process all fixed stack objects.
|
||||||
int64_t Offset = 0;
|
int64_t Offset = 0;
|
||||||
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
|
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
|
||||||
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
|
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
|
||||||
@ -2332,12 +2527,41 @@ int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
|
|||||||
Offset = FixedOffset;
|
Offset = FixedOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Then process all callee saved slots.
|
||||||
|
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
|
||||||
|
// Make sure to align the last callee save slot.
|
||||||
|
MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
|
||||||
|
|
||||||
|
// Assign offsets to the callee save slots.
|
||||||
|
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
|
||||||
|
Offset += MFI.getObjectSize(I);
|
||||||
|
Offset = alignTo(Offset, MFI.getObjectAlignment(I));
|
||||||
|
if (AssignOffsets) {
|
||||||
|
LLVM_DEBUG(dbgs() << "alloc FI(" << I << ") at SP[" << Offset
|
||||||
|
<< "]\n");
|
||||||
|
MFI.setObjectOffset(I, -Offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Note: We don't take allocatable stack objects into
|
// Note: We don't take allocatable stack objects into
|
||||||
// account yet, because allocation for those is not yet
|
// account yet, because allocation for those is not yet
|
||||||
// implemented.
|
// implemented.
|
||||||
return Offset;
|
return Offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
|
||||||
|
MachineFrameInfo &MFI) const {
|
||||||
|
int MinCSFrameIndex, MaxCSFrameIndex;
|
||||||
|
return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
|
||||||
|
MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
|
||||||
|
return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
MachineFunction &MF, RegScavenger *RS) const {
|
MachineFunction &MF, RegScavenger *RS) const {
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
@ -2345,12 +2569,13 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
|||||||
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
|
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
|
||||||
"Upwards growing stack unsupported");
|
"Upwards growing stack unsupported");
|
||||||
|
|
||||||
unsigned MaxAlign = getStackAlignment();
|
int MinCSFrameIndex, MaxCSFrameIndex;
|
||||||
int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
|
int64_t SVEStackSize =
|
||||||
|
assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
|
||||||
|
|
||||||
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||||
AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
|
AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
|
||||||
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
|
AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
|
||||||
|
|
||||||
// If this function isn't doing Win64-style C++ EH, we don't need to do
|
// If this function isn't doing Win64-style C++ EH, we don't need to do
|
||||||
// anything.
|
// anything.
|
||||||
|
@ -101,7 +101,11 @@ public:
|
|||||||
private:
|
private:
|
||||||
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
|
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
|
||||||
unsigned StackBumpBytes) const;
|
unsigned StackBumpBytes) const;
|
||||||
int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
|
|
||||||
|
int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
|
||||||
|
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
|
||||||
|
int &MinCSFrameIndex,
|
||||||
|
int &MaxCSFrameIndex) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
@ -3118,6 +3118,9 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
|
|||||||
switch (CC) {
|
switch (CC) {
|
||||||
default:
|
default:
|
||||||
report_fatal_error("Unsupported calling convention.");
|
report_fatal_error("Unsupported calling convention.");
|
||||||
|
case CallingConv::AArch64_SVE_VectorCall:
|
||||||
|
// Calling SVE functions is currently not yet supported.
|
||||||
|
report_fatal_error("Unsupported calling convention.");
|
||||||
case CallingConv::WebKit_JS:
|
case CallingConv::WebKit_JS:
|
||||||
return CC_AArch64_WebKit_JS;
|
return CC_AArch64_WebKit_JS;
|
||||||
case CallingConv::GHC:
|
case CallingConv::GHC:
|
||||||
|
@ -53,8 +53,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
|
|||||||
/// Amount of stack frame size, not including callee-saved registers.
|
/// Amount of stack frame size, not including callee-saved registers.
|
||||||
unsigned LocalStackSize;
|
unsigned LocalStackSize;
|
||||||
|
|
||||||
|
/// The start and end frame indices for the SVE callee saves.
|
||||||
|
int MinSVECSFrameIndex;
|
||||||
|
int MaxSVECSFrameIndex;
|
||||||
|
|
||||||
/// Amount of stack frame size used for saving callee-saved registers.
|
/// Amount of stack frame size used for saving callee-saved registers.
|
||||||
unsigned CalleeSavedStackSize;
|
unsigned CalleeSavedStackSize;
|
||||||
|
unsigned SVECalleeSavedStackSize;
|
||||||
bool HasCalleeSavedStackSize = false;
|
bool HasCalleeSavedStackSize = false;
|
||||||
|
|
||||||
/// Number of TLS accesses using the special (combinable)
|
/// Number of TLS accesses using the special (combinable)
|
||||||
@ -161,7 +166,6 @@ public:
|
|||||||
void setCalleeSaveStackHasFreeSpace(bool s) {
|
void setCalleeSaveStackHasFreeSpace(bool s) {
|
||||||
CalleeSaveStackHasFreeSpace = s;
|
CalleeSaveStackHasFreeSpace = s;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isSplitCSR() const { return IsSplitCSR; }
|
bool isSplitCSR() const { return IsSplitCSR; }
|
||||||
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
|
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
|
||||||
|
|
||||||
@ -218,6 +222,22 @@ public:
|
|||||||
return CalleeSavedStackSize;
|
return CalleeSavedStackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
|
||||||
|
void setSVECalleeSavedStackSize(unsigned Size) {
|
||||||
|
SVECalleeSavedStackSize = Size;
|
||||||
|
}
|
||||||
|
unsigned getSVECalleeSavedStackSize() const {
|
||||||
|
return SVECalleeSavedStackSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setMinMaxSVECSFrameIndex(int Min, int Max) {
|
||||||
|
MinSVECSFrameIndex = Min;
|
||||||
|
MaxSVECSFrameIndex = Max;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
|
||||||
|
int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
|
||||||
|
|
||||||
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
|
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
|
||||||
unsigned getNumLocalDynamicTLSAccesses() const {
|
unsigned getNumLocalDynamicTLSAccesses() const {
|
||||||
return NumLocalDynamicTLSAccesses;
|
return NumLocalDynamicTLSAccesses;
|
||||||
|
@ -55,6 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||||||
return CSR_AArch64_AllRegs_SaveList;
|
return CSR_AArch64_AllRegs_SaveList;
|
||||||
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
|
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
|
||||||
return CSR_AArch64_AAVPCS_SaveList;
|
return CSR_AArch64_AAVPCS_SaveList;
|
||||||
|
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
|
||||||
|
return CSR_AArch64_SVE_AAPCS_SaveList;
|
||||||
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
|
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
|
||||||
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
|
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
|
||||||
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
|
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
|
||||||
@ -125,7 +127,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
|||||||
if (CC == CallingConv::AArch64_VectorCall)
|
if (CC == CallingConv::AArch64_VectorCall)
|
||||||
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
|
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
|
||||||
if (CC == CallingConv::AArch64_SVE_VectorCall)
|
if (CC == CallingConv::AArch64_SVE_VectorCall)
|
||||||
return CSR_AArch64_SVE_AAPCS_RegMask;
|
return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
|
||||||
|
: CSR_AArch64_SVE_AAPCS_RegMask;
|
||||||
if (CC == CallingConv::CFGuard_Check)
|
if (CC == CallingConv::CFGuard_Check)
|
||||||
return CSR_Win_AArch64_CFGuard_Check_RegMask;
|
return CSR_Win_AArch64_CFGuard_Check_RegMask;
|
||||||
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
|
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
|
||||||
|
@ -30,6 +30,10 @@
|
|||||||
define void @test_address_sve_fp() nounwind { entry: unreachable }
|
define void @test_address_sve_fp() nounwind { entry: unreachable }
|
||||||
define void @test_stack_arg_sve() nounwind { entry: unreachable }
|
define void @test_stack_arg_sve() nounwind { entry: unreachable }
|
||||||
define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
|
define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
|
||||||
|
define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable }
|
||||||
|
define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable }
|
||||||
|
define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable }
|
||||||
|
define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable }
|
||||||
|
|
||||||
...
|
...
|
||||||
# +----------+
|
# +----------+
|
||||||
@ -328,3 +332,183 @@ body: |
|
|||||||
|
|
||||||
RET_ReallyLR
|
RET_ReallyLR
|
||||||
---
|
---
|
||||||
|
...
|
||||||
|
# CHECK-LABEL: name: save_restore_pregs_sve
|
||||||
|
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p6, $sp, 5
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p5, $sp, 6
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p4, $sp, 7
|
||||||
|
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
|
||||||
|
|
||||||
|
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
|
||||||
|
# CHECK: $p6 = frame-destroy LDR_PXI $sp, 5
|
||||||
|
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 6
|
||||||
|
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 7
|
||||||
|
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1
|
||||||
|
# CHECK: RET_ReallyLR
|
||||||
|
name: save_restore_pregs_sve
|
||||||
|
stack:
|
||||||
|
- { id: 0, stack-id: default, size: 32, alignment: 16 }
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
|
||||||
|
$p4 = IMPLICIT_DEF
|
||||||
|
$p5 = IMPLICIT_DEF
|
||||||
|
$p6 = IMPLICIT_DEF
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
---
|
||||||
|
...
|
||||||
|
# CHECK-LABEL: name: save_restore_zregs_sve
|
||||||
|
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -3
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z10, $sp, 0
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z9, $sp, 1
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z8, $sp, 2
|
||||||
|
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
|
||||||
|
|
||||||
|
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
|
||||||
|
# CHECK: $z10 = frame-destroy LDR_ZXI $sp, 0
|
||||||
|
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 1
|
||||||
|
# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 2
|
||||||
|
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 3
|
||||||
|
# CHECK: RET_ReallyLR
|
||||||
|
name: save_restore_zregs_sve
|
||||||
|
stack:
|
||||||
|
- { id: 0, stack-id: default, size: 32, alignment: 16 }
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
|
||||||
|
$z8 = IMPLICIT_DEF
|
||||||
|
$z9 = IMPLICIT_DEF
|
||||||
|
$z10 = IMPLICIT_DEF
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
---
|
||||||
|
...
|
||||||
|
# Test allocation/deallocation of the stack frame together with the
|
||||||
|
# saving/restoring of callee save registers. Fixed-stack objects
|
||||||
|
# are allocated before the callee-saves.
|
||||||
|
# This also adds some non-SVE callee-saves, to ensure that those are
|
||||||
|
# paired correctly.
|
||||||
|
#
|
||||||
|
# CHECK-LABEL: name: save_restore_sve
|
||||||
|
# CHECK: $sp = frame-setup STPXpre killed ${{[a-z0-9]+}}, killed $x21, $sp, -4
|
||||||
|
# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 2
|
||||||
|
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -19
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p15, $sp, 4
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p14, $sp, 5
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p5, $sp, 14
|
||||||
|
# CHECK: frame-setup STR_PXI killed $p4, $sp, 15
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z23, $sp, 2
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z22, $sp, 3
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z9, $sp, 16
|
||||||
|
# CHECK: frame-setup STR_ZXI killed $z8, $sp, 17
|
||||||
|
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
|
||||||
|
|
||||||
|
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
|
||||||
|
# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
|
||||||
|
# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
|
||||||
|
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
|
||||||
|
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
|
||||||
|
# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
|
||||||
|
# CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3
|
||||||
|
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
|
||||||
|
# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17
|
||||||
|
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 19
|
||||||
|
# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2
|
||||||
|
# CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4
|
||||||
|
# CHECK: RET_ReallyLR
|
||||||
|
name: save_restore_sve
|
||||||
|
fixedStack:
|
||||||
|
- { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
|
||||||
|
stack:
|
||||||
|
- { id: 0, stack-id: default, size: 32, alignment: 16 }
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
|
||||||
|
$z8_z9_z10_z11 = IMPLICIT_DEF
|
||||||
|
$z12_z13_z14_z15 = IMPLICIT_DEF
|
||||||
|
$z16_z17_z18_z19 = IMPLICIT_DEF
|
||||||
|
$z20_z21_z22_z23 = IMPLICIT_DEF
|
||||||
|
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||||
|
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||||
|
$p4 = IMPLICIT_DEF
|
||||||
|
$p5 = IMPLICIT_DEF
|
||||||
|
$p6 = IMPLICIT_DEF
|
||||||
|
$p7 = IMPLICIT_DEF
|
||||||
|
$p8 = IMPLICIT_DEF
|
||||||
|
$p9 = IMPLICIT_DEF
|
||||||
|
$p10 = IMPLICIT_DEF
|
||||||
|
$p11 = IMPLICIT_DEF
|
||||||
|
$p12 = IMPLICIT_DEF
|
||||||
|
$p13 = IMPLICIT_DEF
|
||||||
|
$p14 = IMPLICIT_DEF
|
||||||
|
$p15 = IMPLICIT_DEF
|
||||||
|
|
||||||
|
$x19 = IMPLICIT_DEF
|
||||||
|
$x20 = IMPLICIT_DEF
|
||||||
|
$x21 = IMPLICIT_DEF
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
---
|
||||||
|
...
|
||||||
|
# Test allocation/deallocation of the stack frame together with the
|
||||||
|
# saving/restoring of callee save registers. Fixed-stack objects
|
||||||
|
# are allocated before the callee-saves.
|
||||||
|
#
|
||||||
|
# CHECK-LABEL: name: save_restore_sve_realign
|
||||||
|
# CHECK: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
|
||||||
|
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
|
||||||
|
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -19
|
||||||
|
# CHECK-NEXT: STR_PXI killed $p15, $sp, 4
|
||||||
|
# CHECK-NEXT: STR_PXI killed $p14, $sp, 5
|
||||||
|
# CHECK: STR_PXI killed $p5, $sp, 14
|
||||||
|
# CHECK-NEXT: STR_PXI killed $p4, $sp, 15
|
||||||
|
# CHECK-NEXT: STR_ZXI killed $z23, $sp, 2
|
||||||
|
# CHECK-NEXT: STR_ZXI killed $z22, $sp, 3
|
||||||
|
# CHECK: STR_ZXI killed $z9, $sp, 16
|
||||||
|
# CHECK-NEXT: STR_ZXI killed $z8, $sp, 17
|
||||||
|
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
|
||||||
|
# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
|
||||||
|
|
||||||
|
# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -19
|
||||||
|
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
|
||||||
|
# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
|
||||||
|
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
|
||||||
|
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15
|
||||||
|
# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2
|
||||||
|
# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
|
||||||
|
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
|
||||||
|
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
|
||||||
|
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
|
||||||
|
# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
|
||||||
|
# CHECK-NEXT: RET_ReallyLR
|
||||||
|
name: save_restore_sve_realign
|
||||||
|
fixedStack:
|
||||||
|
- { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
|
||||||
|
stack:
|
||||||
|
- { id: 0, stack-id: default, size: 16, alignment: 32 }
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
|
||||||
|
$z8_z9_z10_z11 = IMPLICIT_DEF
|
||||||
|
$z12_z13_z14_z15 = IMPLICIT_DEF
|
||||||
|
$z16_z17_z18_z19 = IMPLICIT_DEF
|
||||||
|
$z20_z21_z22_z23 = IMPLICIT_DEF
|
||||||
|
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||||
|
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||||
|
$p4 = IMPLICIT_DEF
|
||||||
|
$p5 = IMPLICIT_DEF
|
||||||
|
$p6 = IMPLICIT_DEF
|
||||||
|
$p7 = IMPLICIT_DEF
|
||||||
|
$p8 = IMPLICIT_DEF
|
||||||
|
$p9 = IMPLICIT_DEF
|
||||||
|
$p10 = IMPLICIT_DEF
|
||||||
|
$p11 = IMPLICIT_DEF
|
||||||
|
$p12 = IMPLICIT_DEF
|
||||||
|
$p13 = IMPLICIT_DEF
|
||||||
|
$p14 = IMPLICIT_DEF
|
||||||
|
$p15 = IMPLICIT_DEF
|
||||||
|
|
||||||
|
RET_ReallyLR
|
||||||
|
---
|
||||||
|
Loading…
x
Reference in New Issue
Block a user