1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

GlobalISel: handle stack-based parameters on AArch64.

llvm-svn: 282153
This commit is contained in:
Tim Northover 2016-09-22 13:49:25 +00:00
parent 16cf546b7c
commit 3841e1ae81
6 changed files with 285 additions and 73 deletions

View File

@ -111,6 +111,16 @@ public:
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildInstr(unsigned Opcode);
/// Build but don't insert <empty> = \p Opcode <empty>.
///
/// \pre setMF, setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode);
/// Insert an existing instruction at the insertion point.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB);
/// Build and insert \p Res<def> = G_FRAME_INDEX \p Idx
///
/// G_FRAME_INDEX materializes the address of an alloca value or other

View File

@ -71,7 +71,16 @@ void MachineIRBuilder::stopRecordingInsertions() {
//------------------------------------------------------------------------------
MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
return insertInstr(buildInstrNoInsert(Opcode));
}
MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode));
return MIB;
}
MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
getMBB().insert(getInsertPt(), MIB);
if (InsertedInstr)
InsertedInstr(MIB);

View File

@ -32,11 +32,10 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
: CallLowering(&TLI) {
}
bool AArch64CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn,
ArrayRef<ArgInfo> Args,
AssignFnTy AssignValToReg) const {
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = *MF.getFunction();
@ -49,20 +48,149 @@ bool AArch64CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
return false;
}
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// FIXME: Support non-register argument.
if (!VA.isRegLoc())
if (VA.isRegLoc())
Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
else if (VA.isMemLoc()) {
unsigned Size = VA.getValVT().getSizeInBits() / 8;
unsigned Offset = VA.getLocMemOffset();
MachinePointerInfo MPO;
unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
} else {
// FIXME: Support byvals and other weirdness
return false;
// Everything checks out, tell the caller where we've decided this
// parameter/return value should go.
AssignValToReg(MIRBuilder, Args[i].Ty, Args[i].Reg, VA);
}
}
return true;
}
unsigned AArch64CallLowering::ValueHandler::extendRegister(unsigned ValReg,
CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
switch (VA.getLocInfo()) {
default: break;
case CCValAssign::Full:
case CCValAssign::BCvt:
// FIXME: bitconverting between vector types may or may not be a
// nop in big-endian situations.
return ValReg;
case CCValAssign::AExt:
assert(!VA.getLocVT().isVector() && "unexpected vector extend");
// Otherwise, it's a nop.
return ValReg;
case CCValAssign::SExt: {
unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildSExt(NewReg, ValReg);
return NewReg;
}
case CCValAssign::ZExt: {
unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildZExt(NewReg, ValReg);
return NewReg;
}
}
llvm_unreachable("unable to extend register");
}
struct IncomingArgHandler : public AArch64CallLowering::ValueHandler {
IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: ValueHandler(MIRBuilder, MRI) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
auto &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
MIRBuilder.buildFrameIndex(AddrReg, FI);
return AddrReg;
}
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
MIRBuilder.buildCopy(ValVReg, PhysReg);
// FIXME: assert extension
}
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
0);
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
/// How the physical register gets marked varies between formal
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
};
struct FormalArgHandler : public IncomingArgHandler {
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: IncomingArgHandler(MIRBuilder, MRI) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
};
struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB)
: IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
}
MachineInstrBuilder MIB;
};
struct OutgoingArgHandler : public AArch64CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB)
: ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
LLT p0 = LLT::pointer(0, 64);
LLT s64 = LLT::scalar(64);
unsigned SPReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildCopy(SPReg, AArch64::SP);
unsigned OffsetReg = MRI.createGenericVirtualRegister(s64);
MIRBuilder.buildConstant(OffsetReg, Offset);
unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
return AddrReg;
}
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
unsigned ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOStore, Size, 0);
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
MachineInstrBuilder MIB;
};
void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL,
@ -101,32 +229,6 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
PerformArgSplit(SplitRegs, BitOffsets);
}
static void copyToPhysReg(MachineIRBuilder &MIRBuilder, unsigned ValReg,
CCValAssign &VA, MachineRegisterInfo &MRI) {
LLT LocTy{VA.getLocVT()};
switch (VA.getLocInfo()) {
default: break;
case CCValAssign::AExt:
assert(!VA.getLocVT().isVector() && "unexpected vector extend");
// Otherwise, it's a nop.
break;
case CCValAssign::SExt: {
unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildSExt(NewReg, ValReg);
ValReg = NewReg;
break;
}
case CCValAssign::ZExt: {
unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildZExt(NewReg, ValReg);
ValReg = NewReg;
break;
}
}
MIRBuilder.buildCopy(VA.getLocReg(), ValReg);
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
MachineFunction &MF = MIRBuilder.getMF();
@ -152,12 +254,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildExtract(Regs, Offsets, VReg);
});
return handleAssignments(MIRBuilder, AssignFn, SplitArgs,
[&](MachineIRBuilder &MIRBuilder, Type *Ty,
unsigned ValReg, CCValAssign &VA) {
copyToPhysReg(MIRBuilder, ValReg, VA, MRI);
MIB.addUse(VA.getLocReg(), RegState::Implicit);
});
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
return handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler);
}
return true;
}
@ -190,14 +288,8 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
if (!handleAssignments(MIRBuilder, AssignFn, SplitArgs,
[](MachineIRBuilder &MIRBuilder, Type *Ty,
unsigned ValReg, CCValAssign &VA) {
// FIXME: a sign/zeroext loc actually gives
// us an optimization hint. We should use it.
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
MIRBuilder.buildCopy(ValReg, VA.getLocReg());
}))
FormalArgHandler Handler(MIRBuilder, MRI);
if (!handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler))
return false;
// Move back to the end of the basic block.
@ -228,27 +320,24 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CCAssignFn *CallAssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
// And finally we can do the actual assignments. For a call we need to keep
// track of the registers used because they'll be implicit uses of the BL.
SmallVector<unsigned, 8> PhysRegs;
if (!handleAssignments(MIRBuilder, CallAssignFn, SplitArgs,
[&](MachineIRBuilder &MIRBuilder, Type *Ty,
unsigned ValReg, CCValAssign &VA) {
copyToPhysReg(MIRBuilder, ValReg, VA, MRI);
PhysRegs.push_back(VA.getLocReg());
}))
return false;
// Now we can build the actual call instruction.
auto MIB = MIRBuilder.buildInstr(Callee.isReg() ? AArch64::BLR : AArch64::BL);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
: AArch64::BL);
MIB.addOperand(Callee);
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget().getRegisterInfo();
MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
for (auto Reg : PhysRegs)
MIB.addUse(Reg, RegState::Implicit);
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
if (!handleAssignments(MIRBuilder, CallAssignFn, SplitArgs, Handler))
return false;
// Now we can add the actual call instruction to the correct basic block.
MIRBuilder.insertInstr(MIB);
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arugments, the physical register must be an
@ -267,14 +356,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
std::back_inserter(SplitRegs));
});
if (!handleAssignments(MIRBuilder, RetAssignFn, SplitArgs,
[&](MachineIRBuilder &MIRBuilder, Type *Ty,
unsigned ValReg, CCValAssign &VA) {
// FIXME: a sign/zeroext loc actually gives
// us an optimization hint. We should use it.
MIRBuilder.buildCopy(ValReg, VA.getLocReg());
MIB.addDef(VA.getLocReg(), RegState::Implicit);
}))
CallReturnHandler Handler(MIRBuilder, MRI, MIB);
if (!handleAssignments(MIRBuilder, RetAssignFn, SplitArgs, Handler))
return false;
if (!RegOffsets.empty())

View File

@ -25,6 +25,46 @@ class AArch64TargetLowering;
class AArch64CallLowering: public CallLowering {
public:
/// Argument handling is mostly uniform between the four places that
/// make these decisions: function formal arguments, call
/// instruction args, call instruction returns and function
/// returns. However, once a decision has been made on where an
/// arugment should go, exactly what happens can vary slightly. This
/// class abstracts the differences.
struct ValueHandler {
/// Materialize a VReg containing the address of the specified
/// stack-based object. This is either based on a FrameIndex or
/// direct SP manipulation, depending on the context. \p MPO
/// should be initialized to an appropriate description of the
/// address created.
virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) = 0;
/// The specified value has been assigned to a physical register,
/// handle the appropriate COPY (either to or from) and mark any
/// relevant uses/defines as needed.
virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
CCValAssign &VA) = 0;
/// The specified value has been assigned to a stack
/// location. Load or store it there, with appropriate extension
/// if necessary.
virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
uint64_t Size, MachinePointerInfo &MPO,
CCValAssign &VA) = 0;
unsigned extendRegister(unsigned ValReg, CCValAssign &VA);
ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: MIRBuilder(MIRBuilder), MRI(MRI) {}
virtual ~ValueHandler() {}
MachineIRBuilder &MIRBuilder;
MachineRegisterInfo &MRI;
};
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
@ -40,7 +80,10 @@ class AArch64CallLowering: public CallLowering {
private:
typedef std::function<void(MachineIRBuilder &, Type *, unsigned,
CCValAssign &)>
AssignFnTy;
RegHandler;
typedef std::function<void(MachineIRBuilder &, int, CCValAssign &)>
MemHandler;
typedef std::function<void(ArrayRef<unsigned>, ArrayRef<uint64_t>)>
SplitArgTy;
@ -52,7 +95,7 @@ private:
bool handleAssignments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn,
ArrayRef<ArgInfo> Args,
AssignFnTy AssignValToReg) const;
ValueHandler &Callback) const;
};
} // End of namespace llvm;
#endif

View File

@ -0,0 +1,35 @@
; RUN: llc -mtriple=aarch64-apple-ios -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
; CHECK-LABEL: name: test_stack_slots
; CHECK: fixedStack:
; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 1
; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 1, size: 1
; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
; CHECK: [[RHS:%[0-9]+]](s8) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[LHS]], [[RHS]]
; CHECK: [[SUM32:%[0-9]+]](s32) = G_SEXT [[SUM]](s8)
; CHECK: %w0 = COPY [[SUM32]](s32)
define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) {
%sum = add i8 %lhs, %rhs
ret i8 %sum
}
; CHECK-LABEL: name: test_call_stack
; CHECK: [[C42:%[0-9]+]](s8) = G_CONSTANT 42
; CHECK: [[C12:%[0-9]+]](s8) = G_CONSTANT 12
; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT 0
; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64)
; CHECK: G_STORE [[C42]](s8), [[C42_LOC]](p0) :: (store 1 into stack, align 0)
; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
; CHECK: [[C12_OFFS:%[0-9]+]](s64) = G_CONSTANT 1
; CHECK: [[C12_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C12_OFFS]](s64)
; CHECK: G_STORE [[C12]](s8), [[C12_LOC]](p0) :: (store 1 into stack + 1, align 0)
; CHECK: BL @test_stack_slots
define void @test_call_stack() {
call signext i8 @test_stack_slots([8 x i64] undef, i8 signext 42, i8 signext 12)
ret void
}

View File

@ -141,3 +141,35 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) {
%val = load i8, i8* %addr
ret i8 %val
}
; CHECK-LABEL: name: test_stack_slots
; CHECK: fixedStack:
; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 8
; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 8, size: 8
; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
; CHECK: [[RHS:%[0-9]+]](s64) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0)
; CHECK: [[SUM:%[0-9]+]](s64) = G_ADD [[LHS]], [[RHS]]
; CHECK: %x0 = COPY [[SUM]](s64)
define i64 @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs) {
%sum = add i64 %lhs, %rhs
ret i64 %sum
}
; CHECK-LABEL: name: test_call_stack
; CHECK: [[C42:%[0-9]+]](s64) = G_CONSTANT 42
; CHECK: [[C12:%[0-9]+]](s64) = G_CONSTANT 12
; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT 0
; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64)
; CHECK: G_STORE [[C42]](s64), [[C42_LOC]](p0) :: (store 8 into stack, align 0)
; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
; CHECK: [[C12_OFFS:%[0-9]+]](s64) = G_CONSTANT 8
; CHECK: [[C12_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C12_OFFS]](s64)
; CHECK: G_STORE [[C12]](s64), [[C12_LOC]](p0) :: (store 8 into stack + 8, align 0)
; CHECK: BL @test_stack_slots
define void @test_call_stack() {
call i64 @test_stack_slots([8 x i64] undef, i64 42, i64 12)
ret void
}