1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""

This reverts commit bc9a29b9ee6ade4894252b1470977142c32b4602.

The reasoning that this patch was wrong was itself incorrect
(see discussion on llvm-commits). This patch does seem to be exposing
a latent SVE code generation bug on non-public tests, which should
not block a correctness fix for public, non-SVE use cases.
This commit is contained in:
Owen Anderson 2020-09-01 19:25:30 +00:00
parent deb30c80fe
commit f252e214b7
6 changed files with 125 additions and 19 deletions

View File

@ -1024,10 +1024,6 @@ static bool needsWinCFI(const MachineFunction &MF) {
F.needsUnwindTableEntry();
}
static bool isTargetDarwin(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
}
static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
@ -1185,7 +1181,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// For funclets the FP belongs to the containing function.
if (!IsFunclet && HasFP) {
// Only set up FP if we actually need to.
int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@ -1409,11 +1405,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
const int StackGrowth = isTargetDarwin(MF)
? (2 * -TD.getPointerSize(0))
: -AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
@ -1481,10 +1472,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset w28, -32
if (HasFP) {
const int OffsetToFirstCalleeSaveFromFP =
AFI->getCalleeSaveBaseToFrameRecordOffset() -
AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo->getFrameRegister(MF);
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@ -1775,10 +1771,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
int64_t OffsetToFrameRecord =
isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
{OffsetToFrameRecord, MVT::i8},
{-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
@ -1839,11 +1833,11 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
unsigned FPAdjust = isTargetDarwin(MF)
? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
int64_t FPAdjust =
CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
}
@ -2231,6 +2225,14 @@ static void computeCalleeSaveRegisterPairs(
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
// Save the offset to frame record so that the FP register can point to the
// innermost frame record (spilled FP and LR registers).
if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
RPI.Reg2 == AArch64::FP) ||
(IsWindows && RPI.Reg1 == AArch64::FP &&
RPI.Reg2 == AArch64::LR)))
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
RegPairs.push_back(RPI);
if (RPI.isPaired())
++i;

View File

@ -3442,8 +3442,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
"SP increment/decrement not 16-byte aligned");
assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
"SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;

View File

@ -135,6 +135,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// e.g. Tail Call, Thunk, or Function if none apply.
Optional<std::string> OutliningStyle;
// Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
// CalleeSavedStackSize) to the address of the frame record.
int CalleeSaveBaseToFrameRecordOffset = 0;
public:
AArch64FunctionInfo() = default;
@ -338,6 +342,13 @@ public:
TaggedBasePointerOffset = Offset;
}
int getCalleeSaveBaseToFrameRecordOffset() const {
return CalleeSaveBaseToFrameRecordOffset;
}
void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
CalleeSaveBaseToFrameRecordOffset = Offset;
}
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;

View File

@ -0,0 +1,22 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
; The purpose of this test is to verify that frame pointer (x29)
; is correctly setup in the presence of callee-saved floating
; point registers. The frame pointer should point to the frame
; record, which is located 16 bytes above the end of the CSR
; space when a single FP CSR is in use.
define void @test1(i32) #26 {
entry:
call void asm sideeffect "nop", "~{d8}"() #26
ret void
}
; CHECK-LABEL: test1:
; CHECK: str d8, [sp, #-32]!
; CHECK-NEXT: stp x29, x30, [sp, #16]
; CHECK-NEXT: add x29, sp, #16
; CHECK: nop
; CHECK: ldp x29, x30, [sp, #16]
; CHECK-NEXT: ldr d8, [sp], #32
; CHECK-NEXT: ret
attributes #26 = { nounwind }

View File

@ -0,0 +1,29 @@
# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s
---
name: TestFrameRecordLocation
tracksRegLiveness: true
frameInfo:
isFrameAddressTaken: true
body: |
bb.0:
$d8 = IMPLICIT_DEF
$d9 = IMPLICIT_DEF
$x19 = IMPLICIT_DEF
RET_ReallyLR
# CHECK-LABEL: TestFrameRecordLocation
# CHECK: stp d9, d8, [sp, #-48]!
# CHECK: stp x29, x30, [sp, #16]
# CHECK: str x19, [sp, #32]
# CHECK: add x29, sp, #16
# CHECK: .cfi_def_cfa w29, 32
# CHECK: .cfi_offset w19, -16
# CHECK: .cfi_offset w30, -24
# CHECK: .cfi_offset w29, -32
# CHECK: .cfi_offset b8, -40
# CHECK: .cfi_offset b9, -48
...

View File

@ -0,0 +1,42 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; The purpose of this test is to construct a scenario where an odd number
; of callee-saved GPRs as well as an odd number of callee-saved FPRs are
; used. This caused the frame pointer to be aligned to a multiple of 8
; on non-Darwin platforms, rather than a multiple of 16 as usual.
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
@a = global i64 0, align 4
define i64 @b() {
entry:
%call = tail call i64 @d()
%0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16
%1 = ptrtoint i8* %0 to i64
store i64 %1, i64* @a, align 4
%call1 = call i64 @e()
%conv = sitofp i64 %call1 to float
%2 = load i64, i64* @a, align 4
%call2 = call i64 @f(i64 %2)
%conv3 = fptosi float %conv to i64
ret i64 %conv3
}
; CHECK-LABEL: b:
; CHECK: str d8, [sp, #-32]!
; CHECK-NEXT: stp x29, x30, [sp, #8]
; CHECK-NEXT: str x19, [sp, #24]
; CHECK-NEXT: add x29, sp, #8
; CHECK: sub sp, x29, #8
; CHECK-NEXT: ldr x19, [sp, #24]
; CHECK-NEXT: ldp x29, x30, [sp, #8]
; CHECK-NEXT: ldr d8, [sp], #32
; CHECK-NEXT: ret
declare i64 @d()
declare i64 @e()
declare i64 @f(i64)