1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[Statepoints] Operand folding in presense of tied registers.

Implement proper folding of statepoint meta operands (deopt and GC)
when statepoint uses tied registers.
For deopt operands it is just about properly preserving tiedness
in new instruction.
For tied GC operands folding is a little bit more tricky.
We can fold tied GC operands only from InlineSpiller, because it knows
how to properly reload tied def after it was turned into memory operand.
Other users (e.g. peephole) cannot properly fold such operands as they
do not know how (or when) to reload them from memory.
We do this by un-tieing operand we want to fold in InlineSpiller
and allowing to fold only untied operands in foldPatchpoint.
This commit is contained in:
Denis Antrushin 2020-06-10 20:01:19 +07:00
parent c3ca06f18c
commit f0a68d4ee0
3 changed files with 240 additions and 5 deletions

View File

@ -810,6 +810,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
bool WasCopy = MI->isCopy();
Register ImpReg;
// TII::foldMemoryOperand will do what we need here for statepoint
// (fold load into use and remove corresponding def). We will replace
// uses of removed def with loads (spillAroundUses).
// For that to work we need to untie def and use to pass it through
// foldMemoryOperand and signal foldPatchpoint that it is allowed to
// fold them.
bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT;
// Spill subregs if the target allows it.
// We always want to spill subregs for stackmap/patchpoint pseudos.
bool SpillSubRegs = TII.isSubregFoldable() ||
@ -829,6 +837,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
continue;
}
if (UntieRegs && MO.isTied())
MI->untieRegOperand(Idx);
if (!SpillSubRegs && MO.getSubReg())
return false;
// We cannot fold a load instruction into a def.

View File

@ -471,6 +471,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
unsigned NumDefs = 0;
switch (MI.getOpcode()) {
case TargetOpcode::STACKMAP: {
// StackMapLiveValues are foldable
@ -486,16 +487,30 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
case TargetOpcode::STATEPOINT: {
// For statepoints, fold deopt and gc arguments, but not call arguments.
StartIdx = StatepointOpers(&MI).getVarIdx();
NumDefs = MI.getNumDefs();
break;
}
default:
llvm_unreachable("unexpected stackmap opcode");
}
unsigned DefToFoldIdx = MI.getNumOperands();
// Return false if any operands requested for folding are not foldable (not
// part of the stackmap's live values).
for (unsigned Op : Ops) {
if (Op < StartIdx)
if (Op < NumDefs) {
assert(DefToFoldIdx == MI.getNumOperands() && "Folding multiple defs");
DefToFoldIdx = Op;
} else if (Op < StartIdx) {
return nullptr;
}
// When called from regalloc (InlineSpiller), operands must be untied,
// and regalloc will take care of (re)loading operand from memory.
// But when called from other places (e.g. peephole pass),
// we cannot fold operand which are tied - callers are unaware they
// need to reload destination register.
if (MI.getOperand(Op).isTied())
return nullptr;
}
@ -505,11 +520,16 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
MIB.add(MI.getOperand(i));
if (i != DefToFoldIdx)
MIB.add(MI.getOperand(i));
for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI.getOperand(i);
unsigned TiedTo = e;
(void)MI.isRegTiedToDefOperand(i, &TiedTo);
if (is_contained(Ops, i)) {
assert(TiedTo == e && "Cannot fold tied operands");
unsigned SpillSize;
unsigned SpillOffset;
// Compute the spill slot size and offset.
@ -523,9 +543,15 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MIB.addImm(SpillSize);
MIB.addFrameIndex(FrameIndex);
MIB.addImm(SpillOffset);
}
else
} else {
MIB.add(MO);
if (TiedTo < e) {
assert(TiedTo < NumDefs && "Bad tied operand");
if (TiedTo > DefToFoldIdx)
--TiedTo;
NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1);
}
}
}
return NewMI;
}

View File

@ -0,0 +1,198 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=greedy -o - %s | FileCheck %s
--- |
; ModuleID = 'folding.ll'
source_filename = "folding.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
declare void @func()
define i32 @test_spill(i32 addrspace(1)* %arg00, i32 addrspace(1)* %arg01, i32 addrspace(1)* %arg02, i32 addrspace(1)* %arg03, i32 addrspace(1)* %arg04, i32 addrspace(1)* %arg05, i32 addrspace(1)* %arg06, i32 addrspace(1)* %arg07, i32 addrspace(1)* %arg08) gc "statepoint-example" {
%token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %arg00, i32 addrspace(1)* %arg01, i32 addrspace(1)* %arg02, i32 addrspace(1)* %arg03, i32 addrspace(1)* %arg04, i32 addrspace(1)* %arg05, i32 addrspace(1)* %arg06, i32 addrspace(1)* %arg07, i32 addrspace(1)* %arg08) ]
%rel00 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 0, i32 0) ; (%arg00, %arg00)
%rel01 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 1, i32 1) ; (%arg01, %arg01)
%rel02 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 2, i32 2) ; (%arg02, %arg02)
%rel03 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 3, i32 3) ; (%arg03, %arg03)
%rel04 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 4, i32 4) ; (%arg04, %arg04)
%rel05 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 5, i32 5) ; (%arg05, %arg05)
%rel06 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 6, i32 6) ; (%arg06, %arg06)
%rel07 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 7, i32 7) ; (%arg07, %arg07)
%rel08 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 8, i32 8) ; (%arg08, %arg08)
%gep00 = getelementptr i32, i32 addrspace(1)* %rel00, i64 1
%gep01 = getelementptr i32, i32 addrspace(1)* %rel01, i64 2
%gep02 = getelementptr i32, i32 addrspace(1)* %rel02, i64 3
%gep03 = getelementptr i32, i32 addrspace(1)* %rel03, i64 4
%gep04 = getelementptr i32, i32 addrspace(1)* %rel04, i64 5
%gep05 = getelementptr i32, i32 addrspace(1)* %rel05, i64 6
%gep06 = getelementptr i32, i32 addrspace(1)* %rel06, i64 7
%gep07 = getelementptr i32, i32 addrspace(1)* %rel07, i64 8
%gep08 = getelementptr i32, i32 addrspace(1)* %rel08, i64 9
%val00 = load i32, i32 addrspace(1)* %gep00, align 4
%val01 = load i32, i32 addrspace(1)* %gep01, align 4
%sum01 = add i32 %val00, %val01
%val02 = load i32, i32 addrspace(1)* %gep02, align 4
%sum02 = add i32 %sum01, %val02
%val03 = load i32, i32 addrspace(1)* %gep03, align 4
%sum03 = add i32 %sum02, %val03
%val04 = load i32, i32 addrspace(1)* %gep04, align 4
%sum04 = add i32 %sum03, %val04
%val05 = load i32, i32 addrspace(1)* %gep05, align 4
%sum05 = add i32 %sum04, %val05
%val06 = load i32, i32 addrspace(1)* %gep06, align 4
%sum06 = add i32 %sum05, %val06
%val07 = load i32, i32 addrspace(1)* %gep07, align 4
%sum07 = add i32 %sum06, %val07
%val08 = load i32, i32 addrspace(1)* %gep08, align 4
%sum08 = add i32 %sum07, %val08
ret i32 %sum08
}
; Function Attrs: nounwind readonly
declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32 immarg, i32 immarg) #0
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 immarg, i32 immarg, void ()*, i32 immarg, i32 immarg, ...)
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { nounwind readonly }
attributes #1 = { nounwind }
...
---
name: test_spill
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: gr64, preferred-register: '' }
- { id: 1, class: gr64, preferred-register: '' }
- { id: 2, class: gr64, preferred-register: '' }
- { id: 3, class: gr64, preferred-register: '' }
- { id: 4, class: gr64, preferred-register: '' }
- { id: 5, class: gr64, preferred-register: '' }
- { id: 6, class: gr64, preferred-register: '' }
- { id: 7, class: gr64, preferred-register: '' }
- { id: 8, class: gr64, preferred-register: '' }
- { id: 9, class: gr64, preferred-register: '' }
- { id: 10, class: gr64, preferred-register: '' }
- { id: 11, class: gr64, preferred-register: '' }
- { id: 12, class: gr64, preferred-register: '' }
- { id: 13, class: gr64, preferred-register: '' }
- { id: 14, class: gr64, preferred-register: '' }
- { id: 15, class: gr64, preferred-register: '' }
- { id: 16, class: gr64, preferred-register: '' }
- { id: 17, class: gr64, preferred-register: '' }
- { id: 18, class: gr32, preferred-register: '' }
- { id: 19, class: gr32, preferred-register: '' }
- { id: 20, class: gr32, preferred-register: '' }
- { id: 21, class: gr32, preferred-register: '' }
- { id: 22, class: gr32, preferred-register: '' }
- { id: 23, class: gr32, preferred-register: '' }
- { id: 24, class: gr32, preferred-register: '' }
- { id: 25, class: gr32, preferred-register: '' }
- { id: 26, class: gr32, preferred-register: '' }
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
- { reg: '$rdx', virtual-reg: '%2' }
- { reg: '$rcx', virtual-reg: '%3' }
- { reg: '$r8', virtual-reg: '%4' }
- { reg: '$r9', virtual-reg: '%5' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: true
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack:
- { id: 0, type: default, offset: 16, size: 8, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
liveins: $rdi, $rsi, $rdx, $rcx, $r8, $r9
; CHECK-LABEL: name: test_spill
; CHECK: liveins: $rdi, $rsi, $rdx, $rcx, $r8, $r9
; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $r9 :: (store 8 into %stack.0)
; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $r8 :: (store 8 into %stack.1)
; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.2)
; CHECK: [[R1:%[0-9]+]]:gr64 = COPY $rdx
; CHECK: [[R2:%[0-9]+]]:gr64 = COPY $rsi
; CHECK: [[R3:%[0-9]+]]:gr64 = COPY $rdi
; CHECK: [[R4:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
; CHECK: [[R5:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1)
; CHECK: [[R6:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2, align 16)
; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK: [[R6]]:gr64, [[R5]]:gr64, [[R4]]:gr64, [[R1]]:gr64, [[R2]]:gr64, [[R3]]:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, [[R6]], [[R6]](tied-def 0), [[R5]], [[R5]](tied-def 1), [[R4]], [[R4]](tied-def 2), 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.2, 0, [[R1]], [[R1]](tied-def 3), [[R2]], [[R2]](tied-def 4), [[R3]], [[R3]](tied-def 5), csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on %stack.0), (load store 8 on %stack.1), (load store 8 on %stack.2)
; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK: [[RES:%[0-9]+]]:gr32 = MOV32rm [[R3]], 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R2]], 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R1]], 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1)
; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm]], 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1)
; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm1]], 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1)
; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm2]], 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R4]], 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R5]], 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1)
; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R6]], 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1)
; CHECK: $eax = COPY [[RES]]
; CHECK: RET 0, $eax
%12:gr64 = COPY $r9
%13:gr64 = COPY $r8
%14:gr64 = COPY $rcx
%15:gr64 = COPY $rdx
%16:gr64 = COPY $rsi
%17:gr64 = COPY $rdi
%11:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2, align 16)
%10:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1)
%9:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
%9:gr64, %10:gr64, %11:gr64, %12:gr64, %13:gr64, %14:gr64, %15:gr64, %16:gr64, %17:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, %9, %9(tied-def 0), %10, %10(tied-def 1), %11, %11(tied-def 2), %12, %12(tied-def 3), %13, %13(tied-def 4), %14, %14(tied-def 5), %15, %15(tied-def 6), %16, %16(tied-def 7), %17, %17(tied-def 8), csr_64, implicit-def $rsp, implicit-def $ssp
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
%20:gr32 = MOV32rm %17, 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1)
%20:gr32 = ADD32rm %20, %16, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1)
%20:gr32 = ADD32rm %20, %15, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1)
%20:gr32 = ADD32rm %20, %14, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1)
%20:gr32 = ADD32rm %20, %13, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1)
%20:gr32 = ADD32rm %20, %12, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1)
%20:gr32 = ADD32rm %20, %11, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1)
%20:gr32 = ADD32rm %20, %10, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1)
%20:gr32 = ADD32rm %20, %9, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1)
$eax = COPY %20
RET 0, killed $eax
...