1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[AArch64][SVE] Spilling/filling of SVE callee-saves.

Implement the spills/fills of callee-saved SVE registers using STR and LDR
instructions.

Also adds the `aarch64_sve_vector_pcs` attribute to specify the
callee-saved registers to be used for functions that return SVE vectors or
take SVE vectors as arguments. The callee-saved registers are vector
registers z8-z23 and predicate registers p4-p15.

The overal frame-layout with SVE will be as follows:

   +-------------+
   | stack args  |
   +-------------+
   | Callee Saves|
   |   X29, X30  |
   |-------------| <- FP
   | SVE Callee  | < //////////////
   | saved regs  | < //////////////
   |    z23      | < //////////////
   |     :       | < // SCALABLE //
   |    z8       | < //////////////
   |    p15      | < /// STACK ////
   |     :       | < //////////////
   |    p4       | < //// AREA ////
   +-------------+ < //////////////
   |     :       | < //////////////
   |  SVE locals | < //////////////
   |     :       | < //////////////
   +-------------+
   |/////////////| alignment gap.
   |     :       |
   | Stack objs  |
   |     :       |
   +-------------+ <- SP after call and frame-setup

Reviewers: cameron.mcinally, efriedma, greened, thegameg, ostannard, rengolin

Reviewed By: ostannard

Differential Revision: https://reviews.llvm.org/D68996
This commit is contained in:
Sander de Smalen 2019-11-05 16:54:54 +00:00
parent 6f39af85a4
commit ca5d75ffa1
11 changed files with 489 additions and 39 deletions

View File

@ -594,6 +594,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
KEYWORD(aarch64_vector_pcs);
KEYWORD(aarch64_sve_vector_pcs);
KEYWORD(msp430_intrcc);
KEYWORD(avr_intrcc);
KEYWORD(avr_signalcc);

View File

@ -1931,6 +1931,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
/// ::= 'aarch64_vector_pcs'
/// ::= 'aarch64_sve_vector_pcs'
/// ::= 'msp430_intrcc'
/// ::= 'avr_intrcc'
/// ::= 'avr_signalcc'
@ -1977,6 +1978,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
case lltok::kw_aarch64_sve_vector_pcs:
CC = CallingConv::AArch64_SVE_VectorCall;
break;
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
case lltok::kw_avr_intrcc: CC = CallingConv::AVR_INTR; break;
case lltok::kw_avr_signalcc: CC = CallingConv::AVR_SIGNAL; break;

View File

@ -142,6 +142,7 @@ enum Kind {
kw_arm_aapcscc,
kw_arm_aapcs_vfpcc,
kw_aarch64_vector_pcs,
kw_aarch64_sve_vector_pcs,
kw_msp430_intrcc,
kw_avr_intrcc,
kw_avr_signalcc,

View File

@ -364,6 +364,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
case CallingConv::AArch64_SVE_VectorCall:
Out << "aarch64_sve_vector_pcs";
break;
case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
case CallingConv::AVR_INTR: Out << "avr_intrcc "; break;
case CallingConv::AVR_SIGNAL: Out << "avr_signalcc "; break;

View File

@ -405,10 +405,10 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
// Functions taking SVE arguments or returning an SVE type
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
X25, X26, X27, X28, LR, FP,
(sequence "Z%u", 8, 23),
(sequence "P%u", 4, 15))>;
def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
(sequence "P%u", 4, 15),
X19, X20, X21, X22, X23, X24,
X25, X26, X27, X28, LR, FP)>;
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
@ -486,5 +486,7 @@ def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
def CSR_AArch64_AAVPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
def CSR_AArch64_SVE_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
def CSR_AArch64_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;

View File

@ -834,6 +834,20 @@ static bool isTargetDarwin(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
}
// Convenience function to determine whether I is an SVE callee save.
bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
case AArch64::STR_ZXI:
case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
case AArch64::LDR_PXI:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
}
}
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@ -965,7 +979,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
@ -1107,7 +1122,35 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = 0;
}
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
// allocated.
if (AFI->getSVECalleeSavedStackSize()) {
// Find callee save instructions in frame.
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
++MBBI;
CalleeSavesEnd = MBBI;
int64_t OffsetToFirstCalleeSaveFromSP =
MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
StackOffset OffsetToCalleeSavesFromSP =
StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
AllocateBefore -= OffsetToCalleeSavesFromSP;
AllocateAfter = SVEStackSize - AllocateBefore;
}
// Allocate space for the callee saves (if any).
emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
-AllocateBefore, TII,
MachineInstr::FrameSetup);
// Finally allocate remaining SVE stack space.
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
-AllocateAfter, TII,
MachineInstr::FrameSetup);
// Allocate space for the rest of the frame.
@ -1444,7 +1487,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
--LastPopI;
if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
IsSVECalleeSave(LastPopI)) {
++LastPopI;
break;
} else if (CombineSPBump)
@ -1476,11 +1520,53 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
// Process the SVE callee-saves to determine what space needs to be
// deallocated.
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
if (AFI->getSVECalleeSavedStackSize()) {
RestoreBegin = std::prev(RestoreEnd);;
while (IsSVECalleeSave(RestoreBegin) &&
RestoreBegin != MBB.begin())
--RestoreBegin;
++RestoreBegin;
assert(IsSVECalleeSave(RestoreBegin) &&
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
int64_t OffsetToFirstCalleeSaveFromSP =
MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
StackOffset OffsetToCalleeSavesFromSP =
StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
DeallocateBefore = OffsetToCalleeSavesFromSP;
DeallocateAfter = SVEStackSize - DeallocateBefore;
}
// Deallocate the SVE area.
if (SVEStackSize)
if (!AFI->isStackRealigned())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
TII, MachineInstr::FrameDestroy);
if (SVEStackSize) {
if (AFI->isStackRealigned()) {
if (AFI->getSVECalleeSavedStackSize())
// Set SP to start of SVE area, from which the callee-save reloads
// can be done. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
-SVEStackSize, TII, MachineInstr::FrameDestroy);
} else {
if (AFI->getSVECalleeSavedStackSize()) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
{NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
NumBytes = 0;
}
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
DeallocateBefore, TII, MachineInstr::FrameDestroy);
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy);
}
}
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
@ -1813,11 +1899,28 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
enum RegType { GPR, FPR64, FPR128 } Type;
enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
RegPairInfo() = default;
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
unsigned getScale() const {
switch (Type) {
case PPR:
return 2;
case GPR:
case FPR64:
return 8;
case ZPR:
case FPR128:
return 16;
default:
llvm_unreachable("Unsupported type");
}
}
bool isScalable() const { return Type == PPR || Type == ZPR; }
};
} // end anonymous namespace
@ -1842,7 +1945,8 @@ static void computeCalleeSaveRegisterPairs(
CC == CallingConv::PreserveMost ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int Offset = AFI->getCalleeSavedStackSize();
int ByteOffset = AFI->getCalleeSavedStackSize();
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
// On Linux, we will have either one or zero non-paired register. On Windows
// with CFI, we can have multiple unpaired registers in order to utilize the
// available unwind codes. This flag assures that the alignment fixup is done
@ -1858,6 +1962,10 @@ static void computeCalleeSaveRegisterPairs(
RPI.Type = RegPairInfo::FPR64;
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR128;
else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::ZPR;
else if (AArch64::PPRRegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::PPR;
else
llvm_unreachable("Unsupported register class.");
@ -1880,6 +1988,9 @@ static void computeCalleeSaveRegisterPairs(
if (AArch64::FPR128RegClass.contains(NextReg))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::PPR:
case RegPairInfo::ZPR:
break;
}
}
@ -1917,23 +2028,33 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
Offset -= RPI.isPaired() ? 2 * Scale : Scale;
int Scale = RPI.getScale();
if (RPI.isScalable())
ScalableByteOffset -= Scale;
else
ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale;
assert(!(RPI.isScalable() && RPI.isPaired()) &&
"Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
!RPI.isPaired()) {
FixupDone = true;
Offset -= 8;
assert(Offset % 16 == 0);
ByteOffset -= 8;
assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
}
int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(Offset % Scale == 0);
RPI.Offset = Offset / Scale;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
@ -2025,6 +2146,16 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
Size = 16;
Align = 16;
break;
case RegPairInfo::ZPR:
StrOpc = AArch64::STR_ZXI;
Size = 16;
Align = 16;
break;
case RegPairInfo::PPR:
StrOpc = AArch64::STR_PXI;
Size = 2;
Align = 2;
break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@ -2065,6 +2196,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
// Update the StackIDs of the SVE stack slots.
MachineFrameInfo &MFI = MF.getFrameInfo();
if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector);
}
return true;
}
@ -2116,6 +2252,16 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
Size = 16;
Align = 16;
break;
case RegPairInfo::ZPR:
LdrOpc = AArch64::LDR_ZXI;
Size = 16;
Align = 16;
break;
case RegPairInfo::PPR:
LdrOpc = AArch64::LDR_PXI;
Size = 2;
Align = 2;
break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@ -2150,11 +2296,19 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
if (ReverseCSRRestoreSeq)
// SVE objects are always restored in reverse order.
for (const RegPairInfo &RPI : reverse(RegPairs))
if (RPI.isScalable())
EmitMI(RPI);
else
if (ReverseCSRRestoreSeq) {
for (const RegPairInfo &RPI : reverse(RegPairs))
if (!RPI.isScalable())
EmitMI(RPI);
} else
for (const RegPairInfo &RPI : RegPairs)
if (!RPI.isScalable())
EmitMI(RPI);
if (NeedShadowCallStackProlog) {
@ -2202,7 +2356,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(Reg);
bool RegUsed = SavedRegs.test(Reg);
unsigned PairedReg = CSRegs[i ^ 1];
unsigned PairedReg = AArch64::NoRegister;
if (AArch64::GPR64RegClass.contains(Reg) ||
AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg))
PairedReg = CSRegs[i ^ 1];
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
!RegInfo->isReservedReg(MF, Reg)) {
@ -2226,10 +2385,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
unsigned SVECSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned Reg : SavedRegs.set_bits())
CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
for (unsigned Reg : SavedRegs.set_bits()) {
auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
if (AArch64::PPRRegClass.contains(Reg) ||
AArch64::ZPRRegClass.contains(Reg))
SVECSStackSize += RegSize;
else
CSStackSize += RegSize;
}
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
@ -2249,10 +2415,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned MaxAlign = getStackAlignment();
int64_t SVEStackSize =
alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@ -2313,6 +2477,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
bool AArch64FrameLowering::enableStackSlotScavenging(
@ -2321,9 +2486,39 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
return AFI->hasCalleeSaveStackFreeSpace();
}
int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
unsigned &MaxAlign) const {
// Process all fixed stack objects.
/// returns true if there are any SVE callee saves.
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
int &Min, int &Max) {
if (!MFI.isCalleeSavedInfoValid())
return false;
Min = std::numeric_limits<int>::max();
Max = std::numeric_limits<int>::min();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
for (auto &CS : CSI) {
if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
AArch64::PPRRegClass.contains(CS.getReg())) {
assert((Max == std::numeric_limits<int>::min() ||
Max + 1 == CS.getFrameIdx()) &&
"SVE CalleeSaves are not consecutive");
Min = std::min(Min, CS.getFrameIdx());
Max = std::max(Max, CS.getFrameIdx());
}
}
return Min != std::numeric_limits<int>::max();
}
// Process all the SVE stack objects and determine offsets for each
// object. If AssignOffsets is true, the offsets get assigned.
// Fills in the first and last callee-saved frame indices into
// Min/MaxCSFrameIndex, respectively.
// Returns the size of the stack.
static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
int &MinCSFrameIndex,
int &MaxCSFrameIndex,
bool AssignOffsets) {
// First process all fixed stack objects.
int64_t Offset = 0;
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
@ -2332,12 +2527,41 @@ int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
Offset = FixedOffset;
}
// Then process all callee saved slots.
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
// Make sure to align the last callee save slot.
MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
// Assign offsets to the callee save slots.
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
Offset += MFI.getObjectSize(I);
Offset = alignTo(Offset, MFI.getObjectAlignment(I));
if (AssignOffsets) {
LLVM_DEBUG(dbgs() << "alloc FI(" << I << ") at SP[" << Offset
<< "]\n");
MFI.setObjectOffset(I, -Offset);
}
}
}
// Note: We don't take allocatable stack objects into
// account yet, because allocation for those is not yet
// implemented.
return Offset;
}
int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
MachineFrameInfo &MFI) const {
int MinCSFrameIndex, MaxCSFrameIndex;
return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
}
int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
true);
}
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
@ -2345,12 +2569,13 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
unsigned MaxAlign = getStackAlignment();
int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
int MinCSFrameIndex, MaxCSFrameIndex;
int64_t SVEStackSize =
assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.

View File

@ -101,7 +101,11 @@ public:
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
};
} // End llvm namespace

View File

@ -3118,6 +3118,9 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
case CallingConv::AArch64_SVE_VectorCall:
// Calling SVE functions is currently not yet supported.
report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:

View File

@ -53,8 +53,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Amount of stack frame size, not including callee-saved registers.
unsigned LocalStackSize;
/// The start and end frame indices for the SVE callee saves.
int MinSVECSFrameIndex;
int MaxSVECSFrameIndex;
/// Amount of stack frame size used for saving callee-saved registers.
unsigned CalleeSavedStackSize;
unsigned SVECalleeSavedStackSize;
bool HasCalleeSavedStackSize = false;
/// Number of TLS accesses using the special (combinable)
@ -161,7 +166,6 @@ public:
void setCalleeSaveStackHasFreeSpace(bool s) {
CalleeSaveStackHasFreeSpace = s;
}
bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
@ -218,6 +222,22 @@ public:
return CalleeSavedStackSize;
}
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
void setSVECalleeSavedStackSize(unsigned Size) {
SVECalleeSavedStackSize = Size;
}
unsigned getSVECalleeSavedStackSize() const {
return SVECalleeSavedStackSize;
}
void setMinMaxSVECSFrameIndex(int Min, int Max) {
MinSVECSFrameIndex = Min;
MaxSVECSFrameIndex = Max;
}
int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
return NumLocalDynamicTLSAccesses;

View File

@ -55,6 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_AArch64_AAVPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
return CSR_AArch64_SVE_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@ -125,7 +127,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::AArch64_VectorCall)
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (CC == CallingConv::AArch64_SVE_VectorCall)
return CSR_AArch64_SVE_AAPCS_RegMask;
return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
: CSR_AArch64_SVE_AAPCS_RegMask;
if (CC == CallingConv::CFGuard_Check)
return CSR_Win_AArch64_CFGuard_Check_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()

View File

@ -30,6 +30,10 @@
define void @test_address_sve_fp() nounwind { entry: unreachable }
define void @test_stack_arg_sve() nounwind { entry: unreachable }
define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable }
...
# +----------+
@ -328,3 +332,183 @@ body: |
RET_ReallyLR
---
...
# CHECK-LABEL: name: save_restore_pregs_sve
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK: frame-setup STR_PXI killed $p6, $sp, 5
# CHECK: frame-setup STR_PXI killed $p5, $sp, 6
# CHECK: frame-setup STR_PXI killed $p4, $sp, 7
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
# CHECK: $p6 = frame-destroy LDR_PXI $sp, 5
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 6
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 7
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1
# CHECK: RET_ReallyLR
name: save_restore_pregs_sve
stack:
- { id: 0, stack-id: default, size: 32, alignment: 16 }
body: |
bb.0.entry:
$p4 = IMPLICIT_DEF
$p5 = IMPLICIT_DEF
$p6 = IMPLICIT_DEF
RET_ReallyLR
---
...
# CHECK-LABEL: name: save_restore_zregs_sve
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -3
# CHECK: frame-setup STR_ZXI killed $z10, $sp, 0
# CHECK: frame-setup STR_ZXI killed $z9, $sp, 1
# CHECK: frame-setup STR_ZXI killed $z8, $sp, 2
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
# CHECK: $z10 = frame-destroy LDR_ZXI $sp, 0
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 1
# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 2
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 3
# CHECK: RET_ReallyLR
name: save_restore_zregs_sve
stack:
- { id: 0, stack-id: default, size: 32, alignment: 16 }
body: |
bb.0.entry:
$z8 = IMPLICIT_DEF
$z9 = IMPLICIT_DEF
$z10 = IMPLICIT_DEF
RET_ReallyLR
---
...
# Test allocation/deallocation of the stack frame together with the
# saving/restoring of callee save registers. Fixed-stack objects
# are allocated before the callee-saves.
# This also adds some non-SVE callee-saves, to ensure that those are
# paired correctly.
#
# CHECK-LABEL: name: save_restore_sve
# CHECK: $sp = frame-setup STPXpre killed ${{[a-z0-9]+}}, killed $x21, $sp, -4
# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 2
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -19
# CHECK: frame-setup STR_PXI killed $p15, $sp, 4
# CHECK: frame-setup STR_PXI killed $p14, $sp, 5
# CHECK: frame-setup STR_PXI killed $p5, $sp, 14
# CHECK: frame-setup STR_PXI killed $p4, $sp, 15
# CHECK: frame-setup STR_ZXI killed $z23, $sp, 2
# CHECK: frame-setup STR_ZXI killed $z22, $sp, 3
# CHECK: frame-setup STR_ZXI killed $z9, $sp, 16
# CHECK: frame-setup STR_ZXI killed $z8, $sp, 17
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
# CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 19
# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2
# CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4
# CHECK: RET_ReallyLR
name: save_restore_sve
fixedStack:
- { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
stack:
- { id: 0, stack-id: default, size: 32, alignment: 16 }
body: |
bb.0.entry:
$z8_z9_z10_z11 = IMPLICIT_DEF
$z12_z13_z14_z15 = IMPLICIT_DEF
$z16_z17_z18_z19 = IMPLICIT_DEF
$z20_z21_z22_z23 = IMPLICIT_DEF
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
$p4 = IMPLICIT_DEF
$p5 = IMPLICIT_DEF
$p6 = IMPLICIT_DEF
$p7 = IMPLICIT_DEF
$p8 = IMPLICIT_DEF
$p9 = IMPLICIT_DEF
$p10 = IMPLICIT_DEF
$p11 = IMPLICIT_DEF
$p12 = IMPLICIT_DEF
$p13 = IMPLICIT_DEF
$p14 = IMPLICIT_DEF
$p15 = IMPLICIT_DEF
$x19 = IMPLICIT_DEF
$x20 = IMPLICIT_DEF
$x21 = IMPLICIT_DEF
RET_ReallyLR
---
...
# Test allocation/deallocation of the stack frame together with the
# saving/restoring of callee save registers. Fixed-stack objects
# are allocated before the callee-saves.
#
# CHECK-LABEL: name: save_restore_sve_realign
# CHECK: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -19
# CHECK-NEXT: STR_PXI killed $p15, $sp, 4
# CHECK-NEXT: STR_PXI killed $p14, $sp, 5
# CHECK: STR_PXI killed $p5, $sp, 14
# CHECK-NEXT: STR_PXI killed $p4, $sp, 15
# CHECK-NEXT: STR_ZXI killed $z23, $sp, 2
# CHECK-NEXT: STR_ZXI killed $z22, $sp, 3
# CHECK: STR_ZXI killed $z9, $sp, 16
# CHECK-NEXT: STR_ZXI killed $z8, $sp, 17
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -19
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15
# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2
# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
# CHECK-NEXT: RET_ReallyLR
name: save_restore_sve_realign
fixedStack:
- { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
stack:
- { id: 0, stack-id: default, size: 16, alignment: 32 }
body: |
bb.0.entry:
$z8_z9_z10_z11 = IMPLICIT_DEF
$z12_z13_z14_z15 = IMPLICIT_DEF
$z16_z17_z18_z19 = IMPLICIT_DEF
$z20_z21_z22_z23 = IMPLICIT_DEF
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
$p4 = IMPLICIT_DEF
$p5 = IMPLICIT_DEF
$p6 = IMPLICIT_DEF
$p7 = IMPLICIT_DEF
$p8 = IMPLICIT_DEF
$p9 = IMPLICIT_DEF
$p10 = IMPLICIT_DEF
$p11 = IMPLICIT_DEF
$p12 = IMPLICIT_DEF
$p13 = IMPLICIT_DEF
$p14 = IMPLICIT_DEF
$p15 = IMPLICIT_DEF
RET_ReallyLR
---