mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""
This reverts commit bc9a29b9ee6ade4894252b1470977142c32b4602. The reasoning that this patch was wrong was itself incorrect (see discussion on llvm-commits). This patch does seem to be exposing a latent SVE code generation bug on non-public tests, which should not block a correctness fix for public, non-SVE use cases.
This commit is contained in:
parent
deb30c80fe
commit
f252e214b7
@ -1024,10 +1024,6 @@ static bool needsWinCFI(const MachineFunction &MF) {
|
||||
F.needsUnwindTableEntry();
|
||||
}
|
||||
|
||||
static bool isTargetDarwin(const MachineFunction &MF) {
|
||||
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
|
||||
}
|
||||
|
||||
static bool isTargetWindows(const MachineFunction &MF) {
|
||||
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
|
||||
}
|
||||
@ -1185,7 +1181,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// For funclets the FP belongs to the containing function.
|
||||
if (!IsFunclet && HasFP) {
|
||||
// Only set up FP if we actually need to.
|
||||
int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
|
||||
int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
|
||||
|
||||
if (CombineSPBump)
|
||||
FPOffset += AFI->getLocalStackSize();
|
||||
@ -1409,11 +1405,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
if (needsFrameMoves) {
|
||||
const DataLayout &TD = MF.getDataLayout();
|
||||
const int StackGrowth = isTargetDarwin(MF)
|
||||
? (2 * -TD.getPointerSize(0))
|
||||
: -AFI->getCalleeSavedStackSize();
|
||||
Register FramePtr = RegInfo->getFrameRegister(MF);
|
||||
// An example of the prologue:
|
||||
//
|
||||
// .globl __foo
|
||||
@ -1481,10 +1472,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// .cfi_offset w28, -32
|
||||
|
||||
if (HasFP) {
|
||||
const int OffsetToFirstCalleeSaveFromFP =
|
||||
AFI->getCalleeSaveBaseToFrameRecordOffset() -
|
||||
AFI->getCalleeSavedStackSize();
|
||||
Register FramePtr = RegInfo->getFrameRegister(MF);
|
||||
|
||||
// Define the current CFA rule to use the provided FP.
|
||||
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
|
||||
MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex)
|
||||
.setMIFlags(MachineInstr::FrameSetup);
|
||||
@ -1775,10 +1771,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
// non-post-indexed loads for the restores if we aren't actually going to
|
||||
// be able to save any instructions.
|
||||
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
|
||||
int64_t OffsetToFrameRecord =
|
||||
isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
|
||||
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
|
||||
{OffsetToFrameRecord, MVT::i8},
|
||||
{-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
|
||||
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
|
||||
} else if (NumBytes)
|
||||
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
|
||||
@ -1839,11 +1833,11 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
|
||||
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
||||
bool IsWin64 =
|
||||
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
|
||||
|
||||
unsigned FixedObject =
|
||||
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
|
||||
unsigned FPAdjust = isTargetDarwin(MF)
|
||||
? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
|
||||
int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
|
||||
int64_t FPAdjust =
|
||||
CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
|
||||
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
|
||||
}
|
||||
|
||||
@ -2231,6 +2225,14 @@ static void computeCalleeSaveRegisterPairs(
|
||||
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
|
||||
"Offset out of bounds for LDP/STP immediate");
|
||||
|
||||
// Save the offset to frame record so that the FP register can point to the
|
||||
// innermost frame record (spilled FP and LR registers).
|
||||
if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
|
||||
RPI.Reg2 == AArch64::FP) ||
|
||||
(IsWindows && RPI.Reg1 == AArch64::FP &&
|
||||
RPI.Reg2 == AArch64::LR)))
|
||||
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
|
||||
|
||||
RegPairs.push_back(RPI);
|
||||
if (RPI.isPaired())
|
||||
++i;
|
||||
|
@ -3442,8 +3442,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
|
||||
|
||||
// First emit non-scalable frame offsets, or a simple 'mov'.
|
||||
if (Bytes || (!Offset && SrcReg != DestReg)) {
|
||||
assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
|
||||
"SP increment/decrement not 16-byte aligned");
|
||||
assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
|
||||
"SP increment/decrement not 8-byte aligned");
|
||||
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
|
||||
if (Bytes < 0) {
|
||||
Bytes = -Bytes;
|
||||
|
@ -135,6 +135,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
|
||||
/// e.g. Tail Call, Thunk, or Function if none apply.
|
||||
Optional<std::string> OutliningStyle;
|
||||
|
||||
// Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
|
||||
// CalleeSavedStackSize) to the address of the frame record.
|
||||
int CalleeSaveBaseToFrameRecordOffset = 0;
|
||||
|
||||
public:
|
||||
AArch64FunctionInfo() = default;
|
||||
|
||||
@ -338,6 +342,13 @@ public:
|
||||
TaggedBasePointerOffset = Offset;
|
||||
}
|
||||
|
||||
int getCalleeSaveBaseToFrameRecordOffset() const {
|
||||
return CalleeSaveBaseToFrameRecordOffset;
|
||||
}
|
||||
void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
|
||||
CalleeSaveBaseToFrameRecordOffset = Offset;
|
||||
}
|
||||
|
||||
private:
|
||||
// Hold the lists of LOHs.
|
||||
MILOHContainer LOHContainerSet;
|
||||
|
22
test/CodeGen/AArch64/framelayout-fp-csr.ll
Normal file
22
test/CodeGen/AArch64/framelayout-fp-csr.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
|
||||
|
||||
; The purpose of this test is to verify that frame pointer (x29)
|
||||
; is correctly setup in the presence of callee-saved floating
|
||||
; point registers. The frame pointer should point to the frame
|
||||
; record, which is located 16 bytes above the end of the CSR
|
||||
; space when a single FP CSR is in use.
|
||||
define void @test1(i32) #26 {
|
||||
entry:
|
||||
call void asm sideeffect "nop", "~{d8}"() #26
|
||||
ret void
|
||||
}
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: str d8, [sp, #-32]!
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #16]
|
||||
; CHECK-NEXT: add x29, sp, #16
|
||||
; CHECK: nop
|
||||
; CHECK: ldp x29, x30, [sp, #16]
|
||||
; CHECK-NEXT: ldr d8, [sp], #32
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
attributes #26 = { nounwind }
|
29
test/CodeGen/AArch64/framelayout-frame-record.mir
Normal file
29
test/CodeGen/AArch64/framelayout-frame-record.mir
Normal file
@ -0,0 +1,29 @@
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: TestFrameRecordLocation
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
isFrameAddressTaken: true
|
||||
body: |
|
||||
bb.0:
|
||||
$d8 = IMPLICIT_DEF
|
||||
$d9 = IMPLICIT_DEF
|
||||
$x19 = IMPLICIT_DEF
|
||||
RET_ReallyLR
|
||||
|
||||
# CHECK-LABEL: TestFrameRecordLocation
|
||||
|
||||
# CHECK: stp d9, d8, [sp, #-48]!
|
||||
# CHECK: stp x29, x30, [sp, #16]
|
||||
# CHECK: str x19, [sp, #32]
|
||||
|
||||
# CHECK: add x29, sp, #16
|
||||
|
||||
# CHECK: .cfi_def_cfa w29, 32
|
||||
# CHECK: .cfi_offset w19, -16
|
||||
# CHECK: .cfi_offset w30, -24
|
||||
# CHECK: .cfi_offset w29, -32
|
||||
# CHECK: .cfi_offset b8, -40
|
||||
# CHECK: .cfi_offset b9, -48
|
||||
...
|
42
test/CodeGen/AArch64/framelayout-unaligned-fp.ll
Normal file
42
test/CodeGen/AArch64/framelayout-unaligned-fp.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; The purpose of this test is to construct a scenario where an odd number
|
||||
; of callee-saved GPRs as well as an odd number of callee-saved FPRs are
|
||||
; used. This caused the frame pointer to be aligned to a multiple of 8
|
||||
; on non-Darwin platforms, rather than a multiple of 16 as usual.
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
@a = global i64 0, align 4
|
||||
|
||||
|
||||
define i64 @b() {
|
||||
entry:
|
||||
%call = tail call i64 @d()
|
||||
%0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16
|
||||
%1 = ptrtoint i8* %0 to i64
|
||||
store i64 %1, i64* @a, align 4
|
||||
%call1 = call i64 @e()
|
||||
%conv = sitofp i64 %call1 to float
|
||||
%2 = load i64, i64* @a, align 4
|
||||
%call2 = call i64 @f(i64 %2)
|
||||
%conv3 = fptosi float %conv to i64
|
||||
ret i64 %conv3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: b:
|
||||
; CHECK: str d8, [sp, #-32]!
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #8]
|
||||
; CHECK-NEXT: str x19, [sp, #24]
|
||||
; CHECK-NEXT: add x29, sp, #8
|
||||
|
||||
; CHECK: sub sp, x29, #8
|
||||
; CHECK-NEXT: ldr x19, [sp, #24]
|
||||
; CHECK-NEXT: ldp x29, x30, [sp, #8]
|
||||
; CHECK-NEXT: ldr d8, [sp], #32
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
declare i64 @d()
|
||||
declare i64 @e()
|
||||
declare i64 @f(i64)
|
Loading…
x
Reference in New Issue
Block a user