1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[AArch64] Support adding X[8-15,18] registers as CSRs.

Summary:
Specifying X[8-15,18] registers as callee-saved is used to support
CONFIG_ARM64_LSE_ATOMICS in Linux kernel. As part of this patch we:
- use custom CSR list/mask when user specifies custom CSRs
- update Machine Register Info's list of CSRs with additional custom CSRs in
LowerCall and LowerFormalArguments.

Reviewers: srhines, nickdesaulniers, efriedma, javed.absar

Reviewed By: nickdesaulniers

Subscribers: kristof.beyls, jfb, llvm-commits

Differential Revision: https://reviews.llvm.org/D52216

llvm-svn: 342824
This commit is contained in:
Tri Vo 2018-09-22 22:17:50 +00:00
parent b74da1bf4f
commit ae0244420d
11 changed files with 253 additions and 7 deletions

View File

@ -104,6 +104,10 @@ foreach i = {1-7,18,20} in
"Reserve X"#i#", making it unavailable "
"as a GPR">;
foreach i = {8-15,18} in
def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
"CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;

View File

@ -337,6 +337,10 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (Subtarget.hasCustomCallingConv())
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
@ -378,7 +382,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
MIB.addRegMask(Mask);
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);

View File

@ -2918,6 +2918,9 @@ bool AArch64FastISel::fastLowerArguments() {
if (CC != CallingConv::C && CC != CallingConv::Swift)
return false;
if (Subtarget->hasCustomCallingConv())
return false;
// Only handle simple cases of up to 8 GPR and FPR each.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;

View File

@ -356,7 +356,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@ -1541,7 +1541,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
MachineFrameInfo &MFI = MF.getFrameInfo();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
? RegInfo->getBaseRegister()

View File

@ -3106,6 +3106,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
if (Subtarget->hasCustomCallingConv())
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
return Chain;
}
@ -3336,6 +3339,10 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (Subtarget->hasCustomCallingConv()) {
TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
}
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
@ -3729,6 +3736,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
Mask = TRI->getCallPreservedMask(MF, CallConv);
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);
@ -4021,8 +4031,10 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
const uint32_t *Mask =
Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getTLSCallPreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
@ -7745,8 +7757,10 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
const uint32_t *Mask =
Subtarget->getRegisterInfo()->getWindowsStackProbePreservedMask();
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));

View File

@ -75,6 +75,23 @@ const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
return nullptr;
}
void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
MachineFunction &MF) const {
const MCPhysReg *CSRs = getCalleeSavedRegs(&MF);
SmallVector<MCPhysReg, 32> UpdatedCSRs;
for (const MCPhysReg *I = CSRs; *I; ++I)
UpdatedCSRs.push_back(*I);
for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i));
}
}
// Register lists are zero-terminated.
UpdatedCSRs.push_back(0);
MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
}
const TargetRegisterClass *
AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
unsigned Idx) const {
@ -122,6 +139,26 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
return CSR_AArch64_TLS_ELF_RegMask;
}
void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
const uint32_t **Mask) const {
uint32_t *UpdatedMask = MF.allocateRegMask();
unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs());
memcpy(UpdatedMask, *Mask, sizeof(Mask[0]) * RegMaskSize);
for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i),
this, true);
SubReg.isValid(); ++SubReg) {
// See TargetRegisterInfo::getCallPreservedMask for how to interpret the
// register mask.
UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32);
}
}
}
*Mask = UpdatedMask;
}
const uint32_t *
AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {

View File

@ -34,6 +34,10 @@ public:
bool isAnyArgRegReserved(const MachineFunction &MF) const;
void emitReservedArgRegCallError(const MachineFunction &MF) const;
void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const;
void UpdateCustomCallPreservedMask(MachineFunction &MF,
const uint32_t **Mask) const;
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const MCPhysReg *

View File

@ -153,6 +153,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),

View File

@ -142,6 +142,9 @@ protected:
// ReserveXRegister[i] - X#i is not available as a general purpose register.
BitVector ReserveXRegister;
// CustomCallUsedXRegister[i] - X#i call saved.
BitVector CustomCallSavedXRegs;
bool IsLittle;
/// TargetTriple - What processor and OS we're targeting.
@ -229,6 +232,10 @@ public:
bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
bool isXRegCustomCalleeSaved(size_t i) const {
return CustomCallSavedXRegs[i];
}
bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }

View File

@ -0,0 +1,141 @@
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x8 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X8
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x9 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X9
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x10 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X10
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x11 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X11
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x12 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X12
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x13 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X13
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x14 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X14
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x15 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X15
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x18 -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X18
; Test all call-saved-x# options together.
; RUN: llc -mtriple=arm64-linux-gnu \
; RUN: -mattr=+call-saved-x8 \
; RUN: -mattr=+call-saved-x9 \
; RUN: -mattr=+call-saved-x10 \
; RUN: -mattr=+call-saved-x11 \
; RUN: -mattr=+call-saved-x12 \
; RUN: -mattr=+call-saved-x13 \
; RUN: -mattr=+call-saved-x14 \
; RUN: -mattr=+call-saved-x15 \
; RUN: -mattr=+call-saved-x18 \
; RUN: -o - %s | FileCheck %s \
; RUN: --check-prefix=CHECK-SAVED-ALL
; Test GlobalISel.
; RUN: llc -mtriple=arm64-linux-gnu \
; RUN: -mattr=+call-saved-x8 \
; RUN: -mattr=+call-saved-x9 \
; RUN: -mattr=+call-saved-x10 \
; RUN: -mattr=+call-saved-x11 \
; RUN: -mattr=+call-saved-x12 \
; RUN: -mattr=+call-saved-x13 \
; RUN: -mattr=+call-saved-x14 \
; RUN: -mattr=+call-saved-x15 \
; RUN: -mattr=+call-saved-x18 \
; RUN: -global-isel \
; RUN: -o - %s | FileCheck %s \
; RUN: --check-prefix=CHECK-SAVED-ALL
; Used to exhaust the supply of GPRs.
@var = global [30 x i64] zeroinitializer
; Check that callee preserves additional CSRs.
define void @callee() {
; CHECK-LABEL: callee
; CHECK-SAVED-X8: str x8, [sp
; CHECK-SAVED-X9: str x9, [sp
; CHECK-SAVED-X10: str x10, [sp
; CHECK-SAVED-X11: str x11, [sp
; CHECK-SAVED-X12: str x12, [sp
; CHECK-SAVED-X13: str x13, [sp
; CHECK-SAVED-X14: str x14, [sp
; CHECK-SAVED-X15: str x15, [sp
; CHECK-SAVED-X18: str x18, [sp
; CHECK-SAVED-ALL: str x18, [sp
; CHECK-SAVED-ALL-NEXT: stp x15, x14, [sp
; CHECK-SAVED-ALL-NEXT: stp x13, x12, [sp
; CHECK-SAVED-ALL-NEXT: stp x11, x10, [sp
; CHECK-SAVED-ALL-NEXT: stp x9, x8, [sp
%val = load volatile [30 x i64], [30 x i64]* @var
store volatile [30 x i64] %val, [30 x i64]* @var
; CHECK-SAVED-ALL: ldp x9, x8, [sp
; CHECK-SAVED-ALL-NEXT: ldp x11, x10, [sp
; CHECK-SAVED-ALL-NEXT: ldp x13, x12, [sp
; CHECK-SAVED-ALL-NEXT: ldp x15, x14, [sp
; CHECK-SAVED-ALL-NEXT: ldr x18, [sp
; CHECK-SAVED-X8: ldr x8, [sp
; CHECK-SAVED-X9: ldr x9, [sp
; CHECK-SAVED-X10: ldr x10, [sp
; CHECK-SAVED-X11: ldr x11, [sp
; CHECK-SAVED-X12: ldr x12, [sp
; CHECK-SAVED-X13: ldr x13, [sp
; CHECK-SAVED-X14: ldr x14, [sp
; CHECK-SAVED-X15: ldr x15, [sp
; CHECK-SAVED-X18: ldr x18, [sp
ret void
}
; Check that caller doesn't shy away from allocating additional CSRs.
define void @caller() {
; CHECK-LABEL: caller
%val = load volatile [30 x i64], [30 x i64]* @var
; CHECK-SAVED-X8: adrp x8, var
; CHECK-SAVED-X9: adrp x9, var
; CHECK-SAVED-X10: adrp x10, var
; CHECK-SAVED-X11: adrp x11, var
; CHECK-SAVED-X12: adrp x12, var
; CHECK-SAVED-X13: adrp x13, var
; CHECK-SAVED-X14: adrp x14, var
; CHECK-SAVED-X15: adrp x15, var
; CHECK-SAVED-X18: adrp x18, var
; CHECK-SAVED-ALL: adrp x8, var
; CHECK-SAVED-ALL-DAG: ldr x9
; CHECK-SAVED-ALL-DAG: ldr x10
; CHECK-SAVED-ALL-DAG: ldr x11
; CHECK-SAVED-ALL-DAG: ldr x12
; CHECK-SAVED-ALL-DAG: ldr x13
; CHECK-SAVED-ALL-DAG: ldr x14
; CHECK-SAVED-ALL-DAG: ldr x15
; CHECK-SAVED-ALL-DAG: ldr x18
call void @callee()
; CHECK: bl callee
store volatile [30 x i64] %val, [30 x i64]* @var
; CHECK-SAVED-ALL-DAG: str x9
; CHECK-SAVED-ALL-DAG: str x10
; CHECK-SAVED-ALL-DAG: str x11
; CHECK-SAVED-ALL-DAG: str x12
; CHECK-SAVED-ALL-DAG: str x13
; CHECK-SAVED-ALL-DAG: str x14
; CHECK-SAVED-ALL-DAG: str x15
; CHECK-SAVED-ALL-DAG: str x18
ret void
}

View File

@ -0,0 +1,28 @@
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x18 -mattr=+call-saved-x18 \
; RUN: -o - %s | FileCheck %s
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x18 -mattr=+call-saved-x18 \
; RUN: -global-isel \
; RUN: -o - %s | FileCheck %s
; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x18 -mattr=+call-saved-x18 \
; RUN: -fast-isel \
; RUN: -o - %s | FileCheck %s
; Used to exhaust the supply of GPRs.
@var = global [30 x i64] zeroinitializer
; If a register is specified to be both reserved and callee-saved, then it
; should not be allocated and should not be spilled onto the stack.
define void @foo() {
; CHECK-NOT: str x18, [sp
%val = load volatile [30 x i64], [30 x i64]* @var
store volatile [30 x i64] %val, [30 x i64]* @var
; CHECK-NOT: ldr x18
; CHECK-NOT: str x18
; CHECK-NOT: ldr x18, [sp
ret void
}