1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

ARM: add pseudo-instructions for lit-pool global materialisation

These are used by MachO only at the moment, and (much like the existing
MOVW/MOVT set) work around the fact that the labels used in the actual
instructions often contain PC-dependent components, which means that repeatedly
materialising the same global can't be CSEed.

With small modifications, it could be adapted to how ELF finds the address of
_GLOBAL_OFFSET_TABLE_, which would give similar benefits in PIC mode there.

llvm-svn: 196090
This commit is contained in:
Tim Northover 2013-12-02 10:35:41 +00:00
parent d153673433
commit 46df9f449d
9 changed files with 179 additions and 81 deletions

View File

@ -1325,6 +1325,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
Opcode == ARM::tLDRpci_pic ||
Opcode == ARM::LDRLIT_ga_pcrel ||
Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
Opcode == ARM::tLDRLIT_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel) {
@ -1338,7 +1341,10 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
if (MO0.getOffset() != MO1.getOffset())
return false;
if (Opcode == ARM::MOV_ga_pcrel ||
if (Opcode == ARM::LDRLIT_ga_pcrel ||
Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
Opcode == ARM::tLDRLIT_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel)
// Ignore the PC labels.

View File

@ -18,11 +18,13 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
#include "llvm/Target/TargetFrameLowering.h"
@ -898,6 +900,59 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
case ARM::LDRLIT_ga_abs:
case ARM::LDRLIT_ga_pcrel:
case ARM::LDRLIT_ga_pcrel_ldr:
case ARM::tLDRLIT_ga_abs:
case ARM::tLDRLIT_ga_pcrel: {
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
const GlobalValue *GV = MO1.getGlobal();
bool IsARM =
Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
bool IsPIC =
Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
unsigned PICAddOpc =
IsARM
? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR)
: ARM::tPICADD;
// We need a new const-pool entry to load from.
MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
unsigned ARMPCLabelIndex = 0;
MachineConstantPoolValue *CPV;
if (IsPIC) {
unsigned PCAdj = IsARM ? 8 : 4;
ARMPCLabelIndex = AFI->createPICLabelUId();
CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex,
ARMCP::CPValue, PCAdj);
} else
CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (IsARM)
MIB.addImm(0);
AddDefaultPred(MIB);
if (IsPIC) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(ARMPCLabelIndex);
if (IsARM)
AddDefaultPred(MIB);
}
MI.eraseFromParent();
return true;
}
case ARM::MOV_ga_pcrel:
case ARM::MOV_ga_pcrel_ldr:
case ARM::t2MOV_ga_pcrel: {

View File

@ -534,8 +534,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
} else
Base = N;
@ -702,8 +701,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
@ -963,8 +961,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = CurDAG->getTargetFrameIndex(FI,
getTargetLowering()->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@ -1141,8 +1138,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
if (!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@ -1278,8 +1274,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.

View File

@ -1700,19 +1700,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetDarwin() && "WrapperPIC use on non-Darwin?");
Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
@ -2537,56 +2528,20 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
if (Subtarget->useMovt()) {
if (Subtarget->useMovt())
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
if (RelocM == Reloc::Static)
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
unsigned Wrapper =
RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into multiple nodes
unsigned Wrapper =
RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(),
false, false, false, 0);
return Result;
}
unsigned ARMPCLabelIndex = 0;
SDValue CPAddr;
if (RelocM == Reloc::Static) {
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
} else {
ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
false, false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
false, false, false, 0);
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}

View File

@ -5182,6 +5182,10 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set GPR:$dst, (arm_i32imm:$src))]>,
Requires<[IsARM]>;
def LDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i,
[(set GPR:$dst, (ARMWrapper tglobaladdr:$src))]>,
Requires<[IsARM, DontUseMovt]>;
// Pseudo instruction that combines movw + movt + add pc (if PIC).
// It also makes it possible to rematerialize the instructions.
// FIXME: Remove this when we can do generalized remat and when machine licm
@ -5192,6 +5196,18 @@ def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
[(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, UseMovt]>;
def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iLoadiALU,
[(set GPR:$dst,
(ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, DontUseMovt]>;
def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
NoItinerary,
[(set GPR:$dst,
(load (ARMWrapperPIC tglobaladdr:$addr)))]>,
Requires<[IsARM, DontUseMovt]>;
let AddedComplexity = 10 in
def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2ld,

View File

@ -1309,6 +1309,19 @@ def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
// ConstantPool
def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
// GlobalAddress
def tLDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iLoadiALU,
[(set GPR:$dst,
(ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsThumb, DontUseMovt]>;
def tLDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i,
[(set GPR:$dst,
(ARMWrapper tglobaladdr:$src))]>,
Requires<[IsThumb, DontUseMovt]>;
// JumpTable
def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
(tLEApcrelJT tjumptable:$dst, imm:$id)>;

View File

@ -0,0 +1,61 @@
; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-PIC
; RUN: llc -mtriple=arm-apple-darwin-eabi -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-ARM-PIC
; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -relocation-model=dynamic-no-pic -o - %s | FileCheck %s --check-prefix=CHECK-DYNAMIC
; RUN: llc -mtriple=arm-apple-darwin-eabi -relocation-model=dynamic-no-pic -o - %s | FileCheck %s --check-prefix=CHECK-DYNAMIC
; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -relocation-model=static -o - %s | FileCheck %s --check-prefix=CHECK-STATIC
; RUN: llc -mtriple=arm-apple-darwin-eabi -relocation-model=static -o - %s | FileCheck %s --check-prefix=CHECK-STATIC
@var = global [16 x i32] zeroinitializer
declare void @bar(i32*)
define void @foo() {
%flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
%tst = icmp eq i32 %flag, 0
br i1 %tst, label %true, label %false
true:
tail call void @bar(i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 4))
ret void
false:
ret void
}
; CHECK-THUMB-PIC-LABEL: foo:
; CHECK-THUMB-PIC: ldr r0, LCPI0_0
; CHECK-THUMB-PIC: LPC0_0:
; CHECK-THUMB-PIC-NEXT: add r0, pc
; CHECK-THUMB-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
; CHECK-THUMB-PIC: LCPI0_0:
; CHECK-THUMB-PIC-NEXT: .long _var-(LPC0_0+4)
; CHECK-THUMB-PIC-NOT: LCPI0_1
; CHECK-ARM-PIC-LABEL: foo:
; CHECK-ARM-PIC: ldr [[VAR_OFFSET:r[0-9]+]], LCPI0_0
; CHECK-ARM-PIC: LPC0_0:
; CHECK-ARM-PIC-NEXT: ldr r0, [pc, [[VAR_OFFSET]]]
; CHECK-ARM-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
; CHECK-ARM-PIC: LCPI0_0:
; CHECK-ARM-PIC-NEXT: .long _var-(LPC0_0+8)
; CHECK-ARM-PIC-NOT: LCPI0_1
; CHECK-DYNAMIC-LABEL: foo:
; CHECK-DYNAMIC: ldr r0, LCPI0_0
; CHECK-DYNAMIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
; CHECK-DYNAMIC: LCPI0_0:
; CHECK-DYNAMIC-NEXT: .long _var
; CHECK-DYNAMIC-NOT: LCPI0_1
; CHECK-STATIC-LABEL: foo:
; CHECK-STATIC: ldr r0, LCPI0_0
; CHECK-STATIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
; CHECK-STATIC: LCPI0_0:
; CHECK-STATIC-NEXT: .long _var{{$}}
; CHECK-STATIC-NOT: LCPI0_1

View File

@ -11,6 +11,11 @@ define internal i32 @foo(i32 %i) nounwind {
; THUMB-LABEL: foo:
; THUMB2-LABEL: foo:
entry:
; _nextaddr gets CSEed for use later on.
; THUMB: ldr r[[NEXTADDR_REG:[0-9]+]], [[NEXTADDR_CPI:LCPI0_[0-9]+]]
; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]:
; THUMB: add r[[NEXTADDR_REG]], pc
%0 = load i8** @nextaddr, align 4 ; <i8*> [#uses=2]
%1 = icmp eq i8* %0, null ; <i1> [#uses=1]
; indirect branch gets duplicated here
@ -53,12 +58,11 @@ L1: ; preds = %L2, %bb2
; ARM: ldr [[R1:r[0-9]+]], LCPI
; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
; ARM: str [[R1b]]
; THUMB-LABEL: %L1
; THUMB: ldr
; THUMB: add
; THUMB: ldr [[R2:r[0-9]+]], LCPI
; THUMB: add [[R2]], pc
; THUMB: str [[R2]]
; THUMB: str [[R2]], [r[[NEXTADDR_REG]]]
; THUMB2-LABEL: %L1
; THUMB2: ldr [[R2:r[0-9]+]], LCPI
; THUMB2-NEXT: str{{(.w)?}} [[R2]]
@ -67,4 +71,5 @@ L1: ; preds = %L2, %bb2
}
; ARM: .long Ltmp0-(LPC{{.*}}+8)
; THUMB: .long Ltmp0-(LPC{{.*}}+4)
; THUMB: .long _nextaddr-([[NEXTADDR_PCBASE]]+4)
; THUMB2: .long Ltmp0

View File

@ -5,20 +5,12 @@
; rdar://7354376
; rdar://8887598
; The generated code is no where near ideal. It's not recognizing the two
; constantpool entries being loaded can be merged into one.
@GV = external global i32 ; <i32*> [#uses=2]
define void @t(i32* nocapture %vals, i32 %c) nounwind {
entry:
; ARM-LABEL: t:
; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
; to add the pseudo instructions to make sure they are CSE'ed at the same
; time as the "ldr cp".
; ARM: ldr r{{[0-9]+}}, LCPI0_1
; ARM: LPC0_0:
; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
@ -36,7 +28,7 @@ entry:
bb.nph: ; preds = %entry
; ARM: LCPI0_0:
; ARM: LCPI0_1:
; ARM-NOT: LCPI0_1:
; ARM: .section
; THUMB: BB#1