1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

AArch64: make sure jump table entries can reach entire image

This turns all jump table entries into deltas within the target
function because in the small memory model all code & static data must
be in a 4GB block somewhere in memory.

When the entries were a delta between the table location and a basic
block, the 32-bit signed entries are not enough to guarantee
reachability.

https://reviews.llvm.org/D87286
This commit is contained in:
Tim Northover 2020-09-08 11:08:25 +01:00
parent 07e4739f3c
commit bc600a0484
9 changed files with 317 additions and 74 deletions

View File

@ -88,10 +88,8 @@ public:
void emitStartOfAsmFile(Module &M) override;
void emitJumpTableInfo() override;
void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned JTI);
void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
@ -785,33 +783,25 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
emitAlignment(Align(Size));
OutStreamer->emitLabel(GetJTISymbol(JTI));
for (auto *JTBB : JTBBs)
emitJumpTableEntry(MJTI, JTBB, JTI);
}
}
void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned JTI) {
const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
auto AFI = MF->getInfo<AArch64FunctionInfo>();
unsigned Size = AFI->getJumpTableEntrySize(JTI);
if (Size == 4) {
// .word LBB - LJTI
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
Value = MCBinaryExpr::createSub(Value, Base, OutContext);
} else {
// .byte (LBB - LBB) >> 2 (or .hword)
const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI);
const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
Value = MCBinaryExpr::createSub(Value, Base, OutContext);
Value = MCBinaryExpr::createLShr(
Value, MCConstantExpr::create(2, OutContext), OutContext);
}
OutStreamer->emitValue(Value, Size);
for (auto *JTBB : JTBBs) {
const MCExpr *Value =
MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext);
// Each entry is:
// .byte/.hword (LBB - Lbase)>>2
// or plain:
// .word LBB - Lbase
Value = MCBinaryExpr::createSub(Value, Base, OutContext);
if (Size != 4)
Value = MCBinaryExpr::createLShr(
Value, MCConstantExpr::create(2, OutContext), OutContext);
OutStreamer->emitValue(Value, Size);
}
}
}
/// Small jump tables contain an unsigned byte or half, representing the offset
@ -822,9 +812,9 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
///
/// adr xDest, .LBB0_0
/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
/// add xDest, xDest, xScratch, lsl #2
void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
const llvm::MachineInstr &MI) {
/// add xDest, xDest, xScratch (with "lsl #2" for smaller entries)
void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
const llvm::MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register ScratchRegW =
@ -832,33 +822,50 @@ void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
Register TableReg = MI.getOperand(2).getReg();
Register EntryReg = MI.getOperand(3).getReg();
int JTIdx = MI.getOperand(4).getIndex();
bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
int Size = AArch64FI->getJumpTableEntrySize(JTIdx);
// This has to be first because the compression pass based its reachability
// calculations on the start of the JumpTableDest instruction.
auto Label =
MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
// If we don't already have a symbol to use as the base, use the ADR
// instruction itself.
if (!Label) {
Label = MF->getContext().createTempSymbol();
AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label);
OutStreamer.emitLabel(Label);
}
auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext());
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
.addReg(DestReg)
.addExpr(MCSymbolRefExpr::create(
Label, MF->getContext())));
.addExpr(LabelExpr));
// Load the number of instruction-steps to offset from the label.
unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
unsigned LdrOpcode;
switch (Size) {
case 1: LdrOpcode = AArch64::LDRBBroX; break;
case 2: LdrOpcode = AArch64::LDRHHroX; break;
case 4: LdrOpcode = AArch64::LDRSWroX; break;
default:
llvm_unreachable("Unknown jump table size");
}
EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
.addReg(ScratchRegW)
.addReg(Size == 4 ? ScratchReg : ScratchRegW)
.addReg(TableReg)
.addReg(EntryReg)
.addImm(0)
.addImm(IsByteEntry ? 0 : 1));
.addImm(Size == 1 ? 0 : 1));
// Multiply the steps by 4 and add to the already materialized base label
// address.
// Add to the already materialized base label address, multiplying by 4 if
// compressed.
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
.addReg(DestReg)
.addReg(DestReg)
.addReg(ScratchReg)
.addImm(2));
.addImm(Size == 4 ? 0 : 2));
}
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@ -1256,30 +1263,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
case AArch64::JumpTableDest32: {
// We want:
// ldrsw xScratch, [xTable, xEntry, lsl #2]
// add xDest, xTable, xScratch
unsigned DestReg = MI->getOperand(0).getReg(),
ScratchReg = MI->getOperand(1).getReg(),
TableReg = MI->getOperand(2).getReg(),
EntryReg = MI->getOperand(3).getReg();
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
.addReg(ScratchReg)
.addReg(TableReg)
.addReg(EntryReg)
.addImm(0)
.addImm(1));
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
.addReg(DestReg)
.addReg(TableReg)
.addReg(ScratchReg)
.addImm(0));
return;
}
case AArch64::JumpTableDest32:
case AArch64::JumpTableDest16:
case AArch64::JumpTableDest8:
LowerJumpTableDestSmall(*OutStreamer, *MI);
LowerJumpTableDest(*OutStreamer, *MI);
return;
case AArch64::FMOVH0:

View File

@ -6256,6 +6256,9 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SDValue Entry = Op.getOperand(2);
int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));

View File

@ -692,7 +692,8 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
// 32-bit jump table destination is actually only 2 instructions since we can
// use the table itself as a PC-relative base. But optimization occurs after
// branch relaxation so be pessimistic.
let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
isNotDuplicable = 1 in {
def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
(ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
Sched<[]>;

View File

@ -284,15 +284,14 @@ public:
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
unsigned getJumpTableEntrySize(int Idx) const {
auto It = JumpTableEntryInfo.find(Idx);
if (It != JumpTableEntryInfo.end())
return It->second.first;
return 4;
return JumpTableEntryInfo[Idx].first;
}
MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
return JumpTableEntryInfo.find(Idx)->second.second;
return JumpTableEntryInfo[Idx].second;
}
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
if ((unsigned)Idx >= JumpTableEntryInfo.size())
JumpTableEntryInfo.resize(Idx+1);
JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
}
@ -353,7 +352,7 @@ private:
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
SmallVector<std::pair<unsigned, MCSymbol *>, 2> JumpTableEntryInfo;
};
namespace yaml {

View File

@ -2994,6 +2994,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);

View File

@ -0,0 +1,188 @@
# RUN: llc -run-pass=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s
# JumpTableDest32 uses an `adr` to a temporary label (itself). If duplicated we
# cannot guarantee reachability for any uses after the first.
# CHECK: JumpTableDest32
# CHECK-NOT: JumpTableDest32
--- |
; ModuleID = 'jump-table.ll'
source_filename = "jump-table.ll"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios"
define i32 @test_jumptable32(i32 %in, i1 %tst) {
br i1 %tst, label %true, label %false
true: ; preds = %0
call void @foo()
br label %switch
false: ; preds = %0
call void @bar()
br label %switch
lbl1: ; preds = %lbl4, %lbl3, %def, %switch
%merge = phi i32 [ 1, %switch ], [ 0, %def ], [ 4, %lbl3 ], [ 8, %lbl4 ]
ret i32 %merge
switch: ; preds = %false, %true
switch i32 %in, label %def [
i32 0, label %lbl1
i32 1, label %lbl2
i32 2, label %lbl3
i32 4, label %lbl4
]
def: ; preds = %switch
br label %lbl1
lbl2: ; preds = %switch
%1 = call i64 @llvm.aarch64.space(i32 262144, i64 undef)
ret i32 2
lbl3: ; preds = %switch
br label %lbl1
lbl4: ; preds = %switch
br label %lbl1
}
declare void @foo()
declare void @bar()
; Function Attrs: nounwind
declare i64 @llvm.aarch64.space(i32, i64) #0
attributes #0 = { nounwind }
...
---
name: test_jumptable32
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$w0', virtual-reg: '' }
- { reg: '$w1', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 32
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: true
hasCalls: true
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
stack-id: default, callee-saved-register: '$x20', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo:
hasRedZone: false
jumpTable:
kind: label-difference32
entries:
- id: 0
blocks: [ '%bb.9', '%bb.6', '%bb.7', '%bb.5', '%bb.8' ]
body: |
bb.0 (%ir-block.0):
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $w0, $w1, $x19, $x20, $lr
early-clobber $sp = frame-setup STPXpre killed $x20, killed $x19, $sp, -4 :: (store 8 into %stack.3), (store 8 into %stack.2)
frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.1), (store 8 into %stack.0)
frame-setup CFI_INSTRUCTION def_cfa_offset 32
frame-setup CFI_INSTRUCTION offset $w30, -8
frame-setup CFI_INSTRUCTION offset $w29, -16
frame-setup CFI_INSTRUCTION offset $w19, -24
frame-setup CFI_INSTRUCTION offset $w20, -32
renamable $w19 = COPY $w0
TBZW killed renamable $w1, 0, %bb.2
bb.1.true:
successors: %bb.3(0x80000000)
liveins: $w19
BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
B %bb.3
bb.2.false:
successors: %bb.3(0x80000000)
liveins: $w19
BL @bar, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
B %bb.3
bb.3.switch:
successors: %bb.9(0x1c71c71c), %bb.6(0x1c71c71c), %bb.7(0x1c71c71c), %bb.5(0x0e38e38e), %bb.8(0x1c71c71c)
liveins: $w19
renamable $w8 = ORRWrs $wzr, killed renamable $w19, 0, implicit-def $x8
renamable $x9 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
early-clobber renamable $x10, dead early-clobber renamable $x11 = JumpTableDest32 killed renamable $x9, killed renamable $x8, %jump-table.0
BR killed renamable $x10
bb.5.def:
successors: %bb.9(0x80000000)
renamable $w0 = COPY $wzr
B %bb.9
bb.6.lbl2:
successors: %bb.9(0x80000000)
dead $xzr = SPACE 262144, undef renamable $x8
$w0 = MOVi32imm 2
B %bb.9
bb.7.lbl3:
successors: %bb.9(0x80000000)
renamable $w0 = MOVi32imm 4
B %bb.9
bb.8.lbl4:
successors: %bb.9(0x80000000)
renamable $w0 = MOVi32imm 8
bb.9.lbl1:
liveins: $w0
$fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
early-clobber $sp, $x20, $x19 = frame-destroy LDPXpost $sp, 4 :: (load 8 from %stack.3), (load 8 from %stack.2)
RET_ReallyLR implicit $w0
...

View File

@ -11,7 +11,17 @@ define i32 @test_jumptable(i32 %in) {
i32 4, label %lbl4
]
; CHECK-LABEL: test_jumptable:
; CHECK-NOT: ldrb
; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0
; CHECK: [[PCREL_LBL:.Ltmp.*]]:
; CHECK-NEXT: adr [[PCBASE:x[0-9]+]], [[PCREL_LBL]]
; CHECK: ldrsw x[[OFFSET:[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2]
; CHECK: add [[DEST:x[0-9]+]], [[PCBASE]], x[[OFFSET]]
; CHECK: br [[DEST]]
; CHECK: .LJTI0_0:
; CHECK-NEXT: .word .LBB{{.*}}-[[PCREL_LBL]]
def:
ret i32 0

View File

@ -155,3 +155,56 @@ lbl4:
; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTBASE]])>>2
; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTBASE]])>>2
; CHECK-IOS-NOT: .end_data_region
; Compressing just the first table has the opportunity to truncate the vector of
; sizes. Make sure it doesn't.
define i32 @test_twotables(i32 %in1, i32 %in2) {
; CHECK-LABEL: test_twotables:
; CHECK: .LJTI2_0
; CHECK: .LJTI2_1
switch i32 %in1, label %def [
i32 0, label %lbl1
i32 1, label %lbl2
i32 2, label %lbl3
i32 4, label %lbl4
]
def:
ret i32 0
lbl1:
ret i32 1
lbl2:
ret i32 2
lbl3:
ret i32 4
lbl4:
switch i32 %in1, label %def [
i32 0, label %lbl5
i32 1, label %lbl6
i32 2, label %lbl7
i32 4, label %lbl8
]
lbl5:
call i64 @llvm.aarch64.space(i32 262144, i64 undef)
ret i32 1
lbl6:
call i64 @llvm.aarch64.space(i32 262144, i64 undef)
ret i32 2
lbl7:
call i64 @llvm.aarch64.space(i32 262144, i64 undef)
ret i32 4
lbl8:
call i64 @llvm.aarch64.space(i32 262144, i64 undef)
ret i32 8
}
declare i64 @llvm.aarch64.space(i32, i64)

View File

@ -40,10 +40,10 @@ declare void @g(i32, i32)
; CHECK-NEXT: .seh_endfunclet
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LJTI0_0:
; CHECK: .word .LBB0_2-.LJTI0_0
; CHECK: .word .LBB0_3-.LJTI0_0
; CHECK: .word .LBB0_4-.LJTI0_0
; CHECK: .word .LBB0_5-.LJTI0_0
; CHECK: .word .LBB0_2-.Ltmp0
; CHECK: .word .LBB0_3-.Ltmp0
; CHECK: .word .LBB0_4-.Ltmp0
; CHECK: .word .LBB0_5-.Ltmp0
; CHECK: .seh_handlerdata
; CHECK: .text
; CHECK: .seh_endproc