mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
Codegen: LICM Remove check for exactly 1 register def.
When considering whether to split an instruction with a memory operand into an explicit load and a register-based instruction, we currently check that the resulting instruction has exactly 1 def. This prevents 2 important LICM optimizations: compares with memory operands, and double indirect calls. All the tests and the test-suite pass without the check. My guess as to original intent is to limit the additional register pressure created by the new instruction, but given that we only split out a single register, it is already limited. The licm-dominance test now checks actual memory loads for hoisting instead of undef, and it tests compares. hoist-invariant-load.ll now checks for 2 hoists, the intended hoist, and a bonus from calling a got-relative function in a loop. llvm-svn: 273616
This commit is contained in:
parent
02d9fe2d7a
commit
b89a2c2028
@ -1171,7 +1171,6 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
|
||||
&LoadRegIndex);
|
||||
if (NewOpc == 0) return nullptr;
|
||||
const MCInstrDesc &MID = TII->get(NewOpc);
|
||||
if (MID.getNumDefs() != 1) return nullptr;
|
||||
MachineFunction &MF = *MI->getParent()->getParent();
|
||||
const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
|
||||
// Ok, we're unfolding. Create a temporary register and do the unfold.
|
||||
|
@ -1,5 +1,7 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm"
|
||||
; RUN: llc < %s -stats -O2 2>&1 | grep "2 machine-licm"
|
||||
; 2 invariant loads, 1 for OBJC_SELECTOR_REFERENCES_
|
||||
; and 1 for objc_msgSend from the GOT
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.7.2"
|
||||
|
@ -1,36 +1,55 @@
|
||||
; RUN: llc -asm-verbose=true < %s | FileCheck %s
|
||||
|
||||
; MachineLICM should check dominance before hoisting instructions.
|
||||
; only the load of a0 is guaranteed to execute, so only it can be hoisted.
|
||||
; CHECK: movb (%rdi), [[a0reg:%[a-z0-9]+]]
|
||||
; CHECK: ## %for.body.i
|
||||
; CHECK: testb [[a0reg]], [[a0reg]]
|
||||
; CHECK: ## in Loop:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK: cmpb $1, ({{%[a-z0-9]+}})
|
||||
; CHECK: cmpb $2, ({{%[a-z0-9]+}})
|
||||
; CHECK: cmpb $3, ({{%[a-z0-9]+}})
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-macosx10.7.2"
|
||||
|
||||
define void @CMSColorWorldCreateParametricData() nounwind uwtable optsize ssp {
|
||||
define void @CMSColorWorldCreateParametricData(
|
||||
i8* dereferenceable(1) %a0,
|
||||
i8* dereferenceable(1) %a1,
|
||||
i8* dereferenceable(1) %a2,
|
||||
i8* dereferenceable(1) %a3,
|
||||
i64 %count) nounwind uwtable optsize ssp readonly {
|
||||
entry:
|
||||
br label %for.body.i
|
||||
|
||||
for.body.i:
|
||||
br i1 undef, label %for.inc.i, label %if.then26.i
|
||||
for.body.i:
|
||||
%i = phi i64 [0, %entry], [%i.inc, %for.inc.i]
|
||||
%0 = load i8, i8* %a0, !invariant.load !0
|
||||
%cond0 = icmp eq i8 %0, 0
|
||||
br i1 %cond0, label %for.inc.i, label %if.then26.i
|
||||
|
||||
if.then26.i:
|
||||
br i1 undef, label %if.else.i.i, label %lor.lhs.false.i.i
|
||||
if.then26.i:
|
||||
%1 = load i8, i8* %a1, !invariant.load !0
|
||||
%cond1 = icmp eq i8 %1, 1
|
||||
br i1 %cond1, label %if.else.i.i, label %lor.lhs.false.i.i
|
||||
|
||||
if.else.i.i:
|
||||
br i1 undef, label %lor.lhs.false.i.i, label %if.then116.i.i
|
||||
if.else.i.i:
|
||||
%2 = load i8, i8* %a2, !invariant.load !0
|
||||
%cond2 = icmp eq i8 %2, 2
|
||||
br i1 %cond2, label %lor.lhs.false.i.i, label %for.inc.i
|
||||
|
||||
lor.lhs.false.i.i:
|
||||
br i1 undef, label %for.inc.i, label %if.then116.i.i
|
||||
lor.lhs.false.i.i:
|
||||
%3 = load i8, i8* %a3, !invariant.load !0
|
||||
%cond3 = icmp eq i8 %3, 3
|
||||
br i1 %cond3, label %for.inc.i, label %if.end28.i
|
||||
|
||||
if.then116.i.i:
|
||||
unreachable
|
||||
|
||||
for.inc.i:
|
||||
%cmp17.i = icmp ult i64 undef, undef
|
||||
for.inc.i:
|
||||
%i.inc = add nsw i64 %i, 1
|
||||
%cmp17.i = icmp ult i64 %i.inc, %count
|
||||
br i1 %cmp17.i, label %for.body.i, label %if.end28.i
|
||||
|
||||
if.end28.i:
|
||||
if.end28.i:
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{}
|
||||
|
Loading…
Reference in New Issue
Block a user