mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[Peephole] Allow folding loads into instructions w/multiple uses (such as test64rr)
Peephole opt has a one use limitation which appears to be accidental. The function being used was incorrectly documented as returning whether the def had one *user*, but instead returned true only when there was one *use*. Add a corresponding hasOneNonDbgUser helper, and adjust peephole-opt to use the appropriate one. All of the actual folding code handles multiple uses within a single instruction. That codepath is well exercised through instruction selection. Differential Revision: https://reviews.llvm.org/D63656 llvm-svn: 364336
This commit is contained in:
parent
1512834406
commit
dd91c14e21
@ -561,9 +561,14 @@ public:
|
||||
}
|
||||
|
||||
/// hasOneNonDBGUse - Return true if there is exactly one non-Debug
|
||||
/// instruction using the specified register.
|
||||
/// use of the specified register.
|
||||
bool hasOneNonDBGUse(unsigned RegNo) const;
|
||||
|
||||
/// hasOneNonDBGUse - Return true if there is exactly one non-Debug
|
||||
/// instruction using the specified register. Said instruction may have
|
||||
/// multiple uses.
|
||||
bool hasOneNonDBGUser(unsigned RegNo) const;
|
||||
|
||||
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
|
||||
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
|
||||
/// except that it also changes any definitions of the register as well.
|
||||
|
@ -423,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
|
||||
return ++UI == use_nodbg_end();
|
||||
}
|
||||
|
||||
bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
|
||||
use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
|
||||
if (UI == use_instr_nodbg_end())
|
||||
return false;
|
||||
return ++UI == use_instr_nodbg_end();
|
||||
}
|
||||
|
||||
/// clearKillFlags - Iterate over all the uses of the given register and
|
||||
/// clear the kill flag from the MachineOperand. This function is used by
|
||||
/// optimization passes which extend register lifetimes and need only
|
||||
|
@ -1306,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
|
||||
|
||||
/// Check whether MI is a candidate for folding into a later instruction.
|
||||
/// We only fold loads to virtual registers and the virtual register defined
|
||||
/// has a single use.
|
||||
/// has a single user.
|
||||
bool PeepholeOptimizer::isLoadFoldable(
|
||||
MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
|
||||
if (!MI.canFoldAsLoad() || !MI.mayLoad())
|
||||
@ -1316,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable(
|
||||
return false;
|
||||
|
||||
unsigned Reg = MI.getOperand(0).getReg();
|
||||
// To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
|
||||
// To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
|
||||
// loads. It should be checked when processing uses of the load, since
|
||||
// uses can be removed during peephole.
|
||||
if (!MI.getOperand(0).getSubReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(Reg) &&
|
||||
MRI->hasOneNonDBGUse(Reg)) {
|
||||
MRI->hasOneNonDBGUser(Reg)) {
|
||||
FoldAsLoadDefCandidates.insert(Reg);
|
||||
return true;
|
||||
}
|
||||
|
@ -24,8 +24,7 @@
|
||||
define void @foo(i1 zeroext, i32) nounwind {
|
||||
; X86-LABEL: foo:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: testb %al, %al
|
||||
; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: je .LBB0_1
|
||||
; X86-NEXT: # %bb.3:
|
||||
; X86-NEXT: retl
|
||||
|
@ -8,8 +8,7 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
|
||||
; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp)
|
||||
; JUMP2-NEXT: jl .LBB0_3
|
||||
; JUMP2-NEXT: # %bb.1: # %entry
|
||||
; JUMP2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; JUMP2-NEXT: testl %eax, %eax
|
||||
; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; JUMP2-NEXT: je .LBB0_3
|
||||
; JUMP2-NEXT: # %bb.2: # %UnifiedReturnBlock
|
||||
; JUMP2-NEXT: retl
|
||||
|
88
test/CodeGen/X86/peephole-fold-testrr.mir
Normal file
88
test/CodeGen/X86/peephole-fold-testrr.mir
Normal file
@ -0,0 +1,88 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @atomic(i8** %arg) {
|
||||
%load = load atomic i8*, i8** %arg unordered, align 8
|
||||
%cmp = icmp eq i8* %load, null
|
||||
%zext = zext i1 %cmp to i32
|
||||
ret i32 %zext
|
||||
}
|
||||
|
||||
define i32 @nonatomic_unoptimized(i8** %arg) {
|
||||
%load = load i8*, i8** %arg, align 8
|
||||
%cmp = icmp eq i8* %load, null
|
||||
%zext = zext i1 %cmp to i32
|
||||
ret i32 %zext
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: atomic
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64 }
|
||||
- { id: 1, class: gr64 }
|
||||
- { id: 2, class: gr8 }
|
||||
- { id: 3, class: gr32 }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $rdi
|
||||
|
||||
; CHECK-LABEL: name: atomic
|
||||
; CHECK: liveins: $rdi
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load unordered 8 from %ir.arg)
|
||||
; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
|
||||
; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
|
||||
; CHECK: $eax = COPY [[MOVZX32rr8_]]
|
||||
; CHECK: RET 0, $eax
|
||||
%0:gr64 = COPY $rdi
|
||||
%1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.arg)
|
||||
TEST64rr %1, %1, implicit-def $eflags
|
||||
%2:gr8 = SETCCr 4, implicit $eflags
|
||||
%3:gr32 = MOVZX32rr8 killed %2
|
||||
$eax = COPY %3
|
||||
RET 0, $eax
|
||||
|
||||
...
|
||||
---
|
||||
name: nonatomic_unoptimized
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64 }
|
||||
- { id: 1, class: gr64 }
|
||||
- { id: 2, class: gr8 }
|
||||
- { id: 3, class: gr32 }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $rdi
|
||||
|
||||
; CHECK-LABEL: name: nonatomic_unoptimized
|
||||
; CHECK: liveins: $rdi
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 8 from %ir.arg)
|
||||
; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
|
||||
; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
|
||||
; CHECK: $eax = COPY [[MOVZX32rr8_]]
|
||||
; CHECK: RET 0, $eax
|
||||
%0:gr64 = COPY $rdi
|
||||
%1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load 8 from %ir.arg)
|
||||
TEST64rr %1, %1, implicit-def $eflags
|
||||
%2:gr8 = SETCCr 4, implicit $eflags
|
||||
%3:gr32 = MOVZX32rr8 killed %2
|
||||
$eax = COPY %3
|
||||
RET 0, $eax
|
||||
|
||||
...
|
@ -264,8 +264,7 @@ declare i32 @foo4()
|
||||
define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
|
||||
; X86-LABEL: t11:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: je .LBB11_1
|
||||
; X86-NEXT: # %bb.2: # %bb
|
||||
; X86-NEXT: jmp foo5 # TAILCALL
|
||||
@ -311,8 +310,7 @@ declare i32 @foo5(i32, i32, i32, i32, i32)
|
||||
define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
|
||||
; X86-LABEL: t12:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: testl %eax, %eax
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: je .LBB12_1
|
||||
; X86-NEXT: # %bb.2: # %bb
|
||||
; X86-NEXT: jmp foo6 # TAILCALL
|
||||
|
@ -1386,8 +1386,7 @@ define i32 @irreducibleCFG() #4 {
|
||||
; ENABLE-NEXT: jmp LBB16_1
|
||||
; ENABLE-NEXT: LBB16_2: ## %split
|
||||
; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax
|
||||
; ENABLE-NEXT: movl (%rax), %eax
|
||||
; ENABLE-NEXT: testl %eax, %eax
|
||||
; ENABLE-NEXT: cmpl $0, (%rax)
|
||||
; ENABLE-NEXT: je LBB16_3
|
||||
; ENABLE-NEXT: ## %bb.4: ## %for.body4.i
|
||||
; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax
|
||||
@ -1430,8 +1429,7 @@ define i32 @irreducibleCFG() #4 {
|
||||
; DISABLE-NEXT: jmp LBB16_1
|
||||
; DISABLE-NEXT: LBB16_2: ## %split
|
||||
; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax
|
||||
; DISABLE-NEXT: movl (%rax), %eax
|
||||
; DISABLE-NEXT: testl %eax, %eax
|
||||
; DISABLE-NEXT: cmpl $0, (%rax)
|
||||
; DISABLE-NEXT: je LBB16_3
|
||||
; DISABLE-NEXT: ## %bb.4: ## %for.body4.i
|
||||
; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax
|
||||
|
Loading…
Reference in New Issue
Block a user