1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[NFCI] Replace AArch64StackOffset by StackOffset.

This patch replaces the AArch64StackOffset class by the generic one
defined in TypeSize.h.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D88983
This commit is contained in:
Sander de Smalen 2020-11-03 16:44:20 +00:00
parent 3104bbde01
commit ca12e64408
10 changed files with 184 additions and 373 deletions

View File

@ -117,17 +117,12 @@ public:
// StackOffset - Represent an offset with named fixed and scalable components.
//===----------------------------------------------------------------------===//
namespace NewStackOffset {
class StackOffset;
} // end namespace NewStackOffset
template <> struct LinearPolyBaseTypeTraits<NewStackOffset::StackOffset> {
template <> struct LinearPolyBaseTypeTraits<StackOffset> {
using ScalarTy = int64_t;
static constexpr unsigned Dimensions = 2;
};
namespace NewStackOffset {
/// StackOffset is a class to represent an offset with 2 dimensions,
/// named fixed and scalable, respectively. This class allows a value for both
/// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed
@ -151,9 +146,6 @@ public:
ScalarTy getScalable() const { return this->getValue(1); }
};
} // end namespace NewStackOffset
//===----------------------------------------------------------------------===//
// UnivariateLinearPolyBase - a base class for linear polynomials with multiple
// dimensions, but where only one dimension can be set at any time.

View File

@ -116,7 +116,6 @@
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64StackOffset.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@ -274,7 +273,7 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8};
return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@ -366,15 +365,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8},
TII);
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(Amount), TII);
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
// stack, we want to add it back if we have a reserved call frame.
assert(CalleePopAmount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
{-(int64_t)CalleePopAmount, MVT::i8}, TII);
StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
}
return MBB.erase(I);
}
@ -414,7 +413,8 @@ static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
int64_t NumBytes, NumVGScaledBytes;
OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes);
AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes,
NumVGScaledBytes);
std::string CommentBuffer = "sp";
llvm::raw_string_ostream Comment(CommentBuffer);
@ -441,7 +441,8 @@ MCCFIInstruction AArch64FrameLowering::createCfaOffset(
const TargetRegisterInfo &TRI, unsigned Reg,
const StackOffset &OffsetFromDefCFA) const {
int64_t NumBytes, NumVGScaledBytes;
OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes);
AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
@ -497,12 +498,12 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
StackOffset Offset;
if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) -
StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8);
Offset =
StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
} else {
Offset = {MFI.getObjectOffset(Info.getFrameIdx()) -
getOffsetOfLocalArea(),
MVT::i8};
Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
getOffsetOfLocalArea());
}
unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@ -1107,8 +1108,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
{-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
false, NeedsWinCFI, &HasWinCFI);
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (!NeedsWinCFI && needsFrameMoves) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@ -1141,8 +1142,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
{-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
NeedsWinCFI, &HasWinCFI);
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
@ -1176,8 +1177,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
{FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false,
NeedsWinCFI, &HasWinCFI);
StackOffset::getFixed(FPOffset), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
@ -1287,7 +1288,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
AllocateBefore = {CalleeSavedSize, MVT::nxv1i8};
AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
AllocateAfter = SVEStackSize - AllocateBefore;
}
@ -1319,8 +1320,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
{-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
false, NeedsWinCFI, &HasWinCFI);
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
@ -1475,7 +1476,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
StackOffset TotalSize =
SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8);
SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
} else {
// Encode the stack size of the leaf function.
@ -1662,8 +1663,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
{NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI);
if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
@ -1687,7 +1689,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(IsSVECalleeSave(RestoreBegin) &&
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8};
StackOffset CalleeSavedSizeAsOffset =
StackOffset::getScalable(CalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
@ -1700,14 +1703,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
{-CalleeSavedSize, MVT::nxv1i8}, TII,
StackOffset::getScalable(-CalleeSavedSize), TII,
MachineInstr::FrameDestroy);
} else {
if (AFI->getSVECalleeSavedStackSize()) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
{NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
StackOffset::getFixed(NumBytes), TII,
MachineInstr::FrameDestroy);
NumBytes = 0;
}
@ -1740,7 +1744,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
{StackRestoreBytes, MVT::i8}, TII,
StackOffset::getFixed(StackRestoreBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
if (Done) {
if (HasWinCFI) {
@ -1759,13 +1763,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
{-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
emitFrameOffset(
MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
{NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI);
StackOffset::getFixed(NumBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@ -1786,7 +1791,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
{(int64_t)AfterCSRPopSize, MVT::i8}, TII,
StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
}
if (HasWinCFI)
@ -1806,7 +1811,7 @@ int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
/*PreferFP=*/
MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
/*ForSimm=*/false)
.getBytes();
.getFixed();
}
int AArch64FrameLowering::getNonLocalFrameIndexReference(
@ -1814,7 +1819,8 @@ int AArch64FrameLowering::getNonLocalFrameIndexReference(
return getSEHFrameIndexOffset(MF, FI);
}
static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) {
static StackOffset getFPOffset(const MachineFunction &MF,
int64_t ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
@ -1824,12 +1830,13 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
int64_t FPAdjust =
CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
}
static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) {
static StackOffset getStackOffset(const MachineFunction &MF,
int64_t ObjectOffset) {
const auto &MFI = MF.getFrameInfo();
return {ObjectOffset + (int64_t)MFI.getStackSize(), MVT::i8};
return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
}
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
@ -1838,8 +1845,8 @@ int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
MF.getSubtarget().getRegisterInfo());
int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
? getFPOffset(MF, ObjectOffset).getBytes()
: getStackOffset(MF, ObjectOffset).getBytes();
? getFPOffset(MF, ObjectOffset).getFixed()
: getStackOffset(MF, ObjectOffset).getFixed();
}
StackOffset AArch64FrameLowering::resolveFrameIndexReference(
@ -1862,8 +1869,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
int64_t FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
int64_t Offset = getStackOffset(MF, ObjectOffset).getBytes();
int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
@ -1938,19 +1945,16 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"non-argument/CSR objects cannot be accessed through the frame pointer");
if (isSVE) {
int64_t OffsetFromSPToSVEArea =
MFI.getStackSize() - AFI->getCalleeSavedStackSize();
int64_t OffsetFromFPToSVEArea =
-AFI->getCalleeSaveBaseToFrameRecordOffset();
StackOffset FPOffset = StackOffset(OffsetFromFPToSVEArea, MVT::i8) +
StackOffset(ObjectOffset, MVT::nxv1i8);
StackOffset SPOffset = SVEStackSize +
StackOffset(ObjectOffset, MVT::nxv1i8) +
StackOffset(OffsetFromSPToSVEArea, MVT::i8);
StackOffset FPOffset =
StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
StackOffset SPOffset =
SVEStackSize +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) &&
(SPOffset.getBytes() ||
FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
(SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
RegInfo->needsStackRealignment(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@ -1969,7 +1973,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
return StackOffset::getFixed(FPOffset) + ScalableOffset;
}
// Use the base pointer if we have one.
@ -1986,7 +1990,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
return StackOffset(Offset, MVT::i8) + ScalableOffset;
return StackOffset::getFixed(Offset) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@ -2940,12 +2944,12 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
const int64_t kMaxOffset = 255 * 16;
Register BaseReg = FrameReg;
int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes();
int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
if (BaseRegOffsetBytes < kMinOffset ||
BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
{BaseRegOffsetBytes, MVT::i8}, TII);
StackOffset::getFixed(BaseRegOffsetBytes), TII);
BaseReg = ScratchReg;
BaseRegOffsetBytes = 0;
}
@ -3002,7 +3006,7 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
LoopI->setFlags(FrameRegUpdateFlags);
int64_t ExtraBaseRegUpdate =
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0;
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
if (LoopSize < Size) {
assert(FrameRegUpdate);
assert(Size - LoopSize == 16);
@ -3106,7 +3110,7 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
// realistically happens in function epilogue. Also, STGloop is expanded
// before that pass.
if (InsertI != MBB->end() &&
canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size,
canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
&TotalOffset)) {
UpdateInstr = &*InsertI++;
LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "

View File

@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#include "AArch64StackOffset.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {

View File

@ -3429,6 +3429,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
});
}
void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
// The smallest scalable element supported by scaled SVE addressing
// modes are predicates, which are 2 scalable bytes in size. So the scalable
// byte offset must always be a multiple of 2.
assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
// VGSized offsets are divided by '2', because the VG register is the
// the number of 64bit granules as opposed to 128bit vector chunks,
// which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
// So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
// VG = n * 2 and the dwarf offset must be VG * 8 bytes.
ByteSized = Offset.getFixed();
VGSized = Offset.getScalable() / 2;
}
/// Returns the offset in parts to which this frame offset can be
/// decomposed for the purpose of describing a frame offset.
/// For non-scalable offsets this is simply its byte size.
void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
int64_t &NumDataVectors) {
// The smallest scalable element supported by scaled SVE addressing
// modes are predicates, which are 2 scalable bytes in size. So the scalable
// byte offset must always be a multiple of 2.
assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
NumBytes = Offset.getFixed();
NumDataVectors = 0;
NumPredicateVectors = Offset.getScalable() / 2;
// This method is used to get the offsets to adjust the frame offset.
// If the function requires ADDPL to be used and needs more than two ADDPL
// instructions, part of the offset is folded into NumDataVectors so that it
// uses ADDVL for part of it, reducing the number of ADDPL instructions.
if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
NumPredicateVectors > 62) {
NumDataVectors = NumPredicateVectors / 8;
NumPredicateVectors -= NumDataVectors * 8;
}
}
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@ -3538,7 +3579,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineInstr::MIFlag Flag, bool SetNZCV,
bool NeedsWinCFI, bool *HasWinCFI) {
int64_t Bytes, NumPredicateVectors, NumDataVectors;
Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
Offset, Bytes, NumPredicateVectors, NumDataVectors);
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
@ -3798,7 +3840,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
// Construct the complete offset.
bool IsMulVL = ScaleValue.isScalable();
unsigned Scale = ScaleValue.getKnownMinSize();
int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@ -3840,11 +3882,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
*OutUnscaledOp = *UnscaledOp;
if (IsMulVL)
SOffset = StackOffset(Offset, MVT::nxv1i8) +
StackOffset(SOffset.getBytes(), MVT::i8);
SOffset = StackOffset::get(SOffset.getFixed(), Offset);
else
SOffset = StackOffset(Offset, MVT::i8) +
StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
SOffset = StackOffset::get(Offset, SOffset.getScalable());
return AArch64FrameOffsetCanUpdate |
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
@ -3856,7 +3896,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned ImmIdx = FrameRegIdx + 1;
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));

View File

@ -15,7 +15,6 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "AArch64StackOffset.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@ -293,6 +292,13 @@ public:
Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
Register Reg) const override;
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
int64_t &NumBytes,
int64_t &NumPredicateVectors,
int64_t &NumDataVectors);
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
int64_t &ByteSized,
int64_t &VGSized);
#define GET_INSTRINFO_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"

View File

@ -15,7 +15,6 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64StackOffset.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@ -525,7 +524,7 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Register BaseReg,
int64_t Offset) const {
assert(MI && "Unable to get the legal offset for nil instruction.");
StackOffset SaveOffset(Offset, MVT::i8);
StackOffset SaveOffset = StackOffset::getFixed(Offset);
return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
}
@ -556,7 +555,7 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
// ARM doesn't need the general 64-bit offsets
StackOffset Off(Offset, MVT::i8);
StackOffset Off = StackOffset::getFixed(Offset);
unsigned i = 0;
@ -619,9 +618,9 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
/*PreferFP=*/true,
/*ForSimm=*/false);
Offset += StackOffset(MI.getOperand(FIOperandNum + 1).getImm(), MVT::i8);
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getBytes());
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
return;
}
@ -637,12 +636,11 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// TAGPstack must use the virtual frame register in its 3rd operand.
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
Offset = {MFI.getObjectOffset(FrameIndex) +
AFI->getTaggedBasePointerOffset(),
MVT::i8};
Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
AFI->getTaggedBasePointerOffset());
} else if (Tagged) {
StackOffset SPOffset = {
MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8};
StackOffset SPOffset = StackOffset::getFixed(
MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
if (MFI.hasVarSizedObjects() ||
isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
(AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
@ -663,8 +661,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
FrameReg = AArch64::SP;
Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(),
MVT::i8};
Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
(int64_t)MFI.getStackSize());
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);

View File

@ -1,151 +0,0 @@
//==--AArch64StackOffset.h ---------------------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the StackOffset class, which is used to
// describe scalable and non-scalable offsets during frame lowering.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/TypeSize.h"
#include <cassert>
namespace llvm {
/// StackOffset is a wrapper around scalable and non-scalable offsets and is
/// used in several functions such as 'isAArch64FrameOffsetLegal' and
/// 'emitFrameOffset()'. StackOffsets are described by MVTs, e.g.
//
/// StackOffset(1, MVT::nxv16i8)
//
/// would describe an offset as being the size of a single SVE vector.
///
/// The class also implements simple arithmetic (addition/subtraction) on these
/// offsets, e.g.
//
/// StackOffset(1, MVT::nxv16i8) + StackOffset(1, MVT::i64)
//
/// describes an offset that spans the combined storage required for an SVE
/// vector and a 64bit GPR.
class StackOffset {
int64_t Bytes;
int64_t ScalableBytes;
explicit operator int() const;
public:
using Part = std::pair<int64_t, MVT>;
StackOffset() : Bytes(0), ScalableBytes(0) {}
StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() {
assert(MVT(T).isByteSized() && "Offset type is not a multiple of bytes");
*this += Part(Offset, T);
}
StackOffset(const StackOffset &Other)
: Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {}
StackOffset &operator=(const StackOffset &) = default;
StackOffset &operator+=(const StackOffset::Part &Other) {
const TypeSize Size = Other.second.getSizeInBits();
if (Size.isScalable())
ScalableBytes += Other.first * ((int64_t)Size.getKnownMinSize() / 8);
else
Bytes += Other.first * ((int64_t)Size.getFixedSize() / 8);
return *this;
}
StackOffset &operator+=(const StackOffset &Other) {
Bytes += Other.Bytes;
ScalableBytes += Other.ScalableBytes;
return *this;
}
StackOffset operator+(const StackOffset &Other) const {
StackOffset Res(*this);
Res += Other;
return Res;
}
StackOffset &operator-=(const StackOffset &Other) {
Bytes -= Other.Bytes;
ScalableBytes -= Other.ScalableBytes;
return *this;
}
StackOffset operator-(const StackOffset &Other) const {
StackOffset Res(*this);
Res -= Other;
return Res;
}
StackOffset operator-() const {
StackOffset Res = {};
const StackOffset Other(*this);
Res -= Other;
return Res;
}
/// Returns the scalable part of the offset in bytes.
int64_t getScalableBytes() const { return ScalableBytes; }
/// Returns the non-scalable part of the offset in bytes.
int64_t getBytes() const { return Bytes; }
/// Returns the offset in parts to which this frame offset can be
/// decomposed for the purpose of describing a frame offset.
/// For non-scalable offsets this is simply its byte size.
void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors,
int64_t &NumDataVectors) const {
assert(isValid() && "Invalid frame offset");
NumBytes = Bytes;
NumDataVectors = 0;
NumPredicateVectors = ScalableBytes / 2;
// This method is used to get the offsets to adjust the frame offset.
// If the function requires ADDPL to be used and needs more than two ADDPL
// instructions, part of the offset is folded into NumDataVectors so that it
// uses ADDVL for part of it, reducing the number of ADDPL instructions.
if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
NumPredicateVectors > 62) {
NumDataVectors = NumPredicateVectors / 8;
NumPredicateVectors -= NumDataVectors * 8;
}
}
void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const {
assert(isValid() && "Invalid frame offset");
// VGSized offsets are divided by '2', because the VG register is the
// the number of 64bit granules as opposed to 128bit vector chunks,
// which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
// So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
// VG = n * 2 and the dwarf offset must be VG * 8 bytes.
ByteSized = Bytes;
VGSized = ScalableBytes / 2;
}
/// Returns whether the offset is known zero.
explicit operator bool() const { return Bytes || ScalableBytes; }
bool isValid() const {
// The smallest scalable element supported by scaled SVE addressing
// modes are predicates, which are 2 scalable bytes in size. So the scalable
// byte offset must always be a multiple of 2.
return ScalableBytes % 2 == 0;
}
};
} // end namespace llvm
#endif

View File

@ -19,5 +19,5 @@ set(LLVM_LINK_COMPONENTS
add_llvm_target_unittest(AArch64Tests
InstSizes.cpp
TestStackOffset.cpp
DecomposeStackOffsetTest.cpp
)

View File

@ -0,0 +1,53 @@
//===- TestStackOffset.cpp - StackOffset unit tests------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/TypeSize.h"
#include "AArch64InstrInfo.h"
#include "gtest/gtest.h"
using namespace llvm;
TEST(StackOffset, decomposeStackOffsetForFrameOffsets) {
StackOffset A = StackOffset::getFixed(8);
StackOffset B = StackOffset::getFixed(4);
StackOffset C = StackOffset::getScalable(16);
// If all offsets can be materialized with only ADDVL,
// make sure PLSized is 0.
int64_t ByteSized, VLSized, PLSized;
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(A + B + C, ByteSized, PLSized,
VLSized);
EXPECT_EQ(12, ByteSized);
EXPECT_EQ(1, VLSized);
EXPECT_EQ(0, PLSized);
// If we need an ADDPL to materialize the offset, and the number of scalable
// bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized.
StackOffset D = StackOffset::getScalable(2);
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(C + D, ByteSized, PLSized, VLSized);
EXPECT_EQ(0, ByteSized);
EXPECT_EQ(0, VLSized);
EXPECT_EQ(9, PLSized);
StackOffset E = StackOffset::getScalable(64);
StackOffset F = StackOffset::getScalable(2);
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(E + F, ByteSized, PLSized, VLSized);
EXPECT_EQ(0, ByteSized);
EXPECT_EQ(0, VLSized);
EXPECT_EQ(33, PLSized);
// If the offset requires an ADDPL instruction to materialize, and would
// require more than two instructions, decompose it into both
// ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions.
StackOffset G = StackOffset::getScalable(128);
StackOffset H = StackOffset::getScalable(2);
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(G + H, ByteSized, PLSized, VLSized);
EXPECT_EQ(0, ByteSized);
EXPECT_EQ(8, VLSized);
EXPECT_EQ(1, PLSized);
}

View File

@ -1,131 +0,0 @@
//===- TestStackOffset.cpp - StackOffset unit tests------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AArch64StackOffset.h"
#include "gtest/gtest.h"
using namespace llvm;
TEST(StackOffset, MixedSize) {
StackOffset A(1, MVT::i8);
EXPECT_EQ(1, A.getBytes());
StackOffset B(2, MVT::i32);
EXPECT_EQ(8, B.getBytes());
StackOffset C(2, MVT::v4i64);
EXPECT_EQ(64, C.getBytes());
StackOffset D(2, MVT::nxv4i64);
EXPECT_EQ(64, D.getScalableBytes());
StackOffset E(2, MVT::v4i64);
EXPECT_EQ(0, E.getScalableBytes());
StackOffset F(2, MVT::nxv4i64);
EXPECT_EQ(0, F.getBytes());
}
TEST(StackOffset, Add) {
StackOffset A(1, MVT::i64);
StackOffset B(1, MVT::i32);
StackOffset C = A + B;
EXPECT_EQ(12, C.getBytes());
StackOffset D(1, MVT::i32);
D += A;
EXPECT_EQ(12, D.getBytes());
StackOffset E(1, MVT::nxv1i32);
StackOffset F = C + E;
EXPECT_EQ(12, F.getBytes());
EXPECT_EQ(4, F.getScalableBytes());
}
TEST(StackOffset, Sub) {
StackOffset A(1, MVT::i64);
StackOffset B(1, MVT::i32);
StackOffset C = A - B;
EXPECT_EQ(4, C.getBytes());
StackOffset D(1, MVT::i64);
D -= A;
EXPECT_EQ(0, D.getBytes());
C += StackOffset(2, MVT::nxv1i32);
StackOffset E = StackOffset(1, MVT::nxv1i32);
StackOffset F = C - E;
EXPECT_EQ(4, F.getBytes());
EXPECT_EQ(4, F.getScalableBytes());
}
TEST(StackOffset, isZero) {
StackOffset A(0, MVT::i64);
StackOffset B(0, MVT::i32);
EXPECT_TRUE(!A);
EXPECT_TRUE(!(A + B));
StackOffset C(0, MVT::nxv1i32);
EXPECT_TRUE(!(A + C));
StackOffset D(1, MVT::nxv1i32);
EXPECT_FALSE(!(A + D));
}
TEST(StackOffset, isValid) {
EXPECT_FALSE(StackOffset(1, MVT::nxv8i1).isValid());
EXPECT_TRUE(StackOffset(2, MVT::nxv8i1).isValid());
#ifndef NDEBUG
#ifdef GTEST_HAS_DEATH_TEST
EXPECT_DEATH(StackOffset(1, MVT::i1),
"Offset type is not a multiple of bytes");
EXPECT_DEATH(StackOffset(1, MVT::nxv1i1),
"Offset type is not a multiple of bytes");
#endif // defined GTEST_HAS_DEATH_TEST
#endif // not defined NDEBUG
}
TEST(StackOffset, getForFrameOffset) {
StackOffset A(1, MVT::i64);
StackOffset B(1, MVT::i32);
StackOffset C(1, MVT::nxv4i32);
// If all offsets can be materialized with only ADDVL,
// make sure PLSized is 0.
int64_t ByteSized, VLSized, PLSized;
(A + B + C).getForFrameOffset(ByteSized, PLSized, VLSized);
EXPECT_EQ(12, ByteSized);
EXPECT_EQ(1, VLSized);
EXPECT_EQ(0, PLSized);
// If we need an ADDPL to materialize the offset, and the number of scalable
// bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized.
StackOffset D(1, MVT::nxv16i1);
(C + D).getForFrameOffset(ByteSized, PLSized, VLSized);
EXPECT_EQ(0, ByteSized);
EXPECT_EQ(0, VLSized);
EXPECT_EQ(9, PLSized);
StackOffset E(4, MVT::nxv4i32);
StackOffset F(1, MVT::nxv16i1);
(E + F).getForFrameOffset(ByteSized, PLSized, VLSized);
EXPECT_EQ(0, ByteSized);
EXPECT_EQ(0, VLSized);
EXPECT_EQ(33, PLSized);
// If the offset requires an ADDPL instruction to materialize, and would
// require more than two instructions, decompose it into both
// ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions.
StackOffset G(8, MVT::nxv4i32);
StackOffset H(1, MVT::nxv16i1);
(G + H).getForFrameOffset(ByteSized, PLSized, VLSized);
EXPECT_EQ(0, ByteSized);
EXPECT_EQ(8, VLSized);
EXPECT_EQ(1, PLSized);
}