mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AArch64] Fold spills of COPY of WZR/XZR
Summary: In AArch64InstrInfo::foldMemoryOperandImpl, catch more cases where the COPY being spilled is copying from WZR/XZR, but the source register is not in the COPY destination register's regclass. For example, when spilling: %vreg0 = COPY %XZR ; %vreg0:GPR64common without this change, the code in TargetInstrInfo::foldMemoryOperand() and canFoldCopy() that normally handles cases like this would fail to optimize since %XZR is not in GPR64common. So the spill code generated would be: %vreg0 = COPY %XZR STR %vreg instead of the new code generated: STR %XZR Reviewers: qcolombet, MatzeB Subscribers: mcrosier, aemerson, t.p.northover, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D26976 llvm-svn: 288176
This commit is contained in:
parent
6e479ee202
commit
5ed377ecc1
@ -2598,6 +2598,31 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the case where a WZR/XZR copy is being spilled but the destination
|
||||
// register class doesn't contain WZR/XZR. For example:
|
||||
//
|
||||
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
|
||||
//
|
||||
// In this case we can still safely fold away the COPY and generate the
|
||||
// following spill code:
|
||||
//
|
||||
// STRXui %XZR, <fi#0>
|
||||
//
|
||||
if (MI.isFullCopy() && Ops.size() == 1 && Ops[0] == 0) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const MachineOperand &SrcMO = MI.getOperand(1);
|
||||
unsigned SrcReg = SrcMO.getReg();
|
||||
if (SrcReg == AArch64::WZR || SrcReg == AArch64::XZR) {
|
||||
const TargetRegisterInfo &TRI = getRegisterInfo();
|
||||
const TargetRegisterClass &RC = SrcReg == AArch64::WZR
|
||||
? AArch64::GPR32RegClass
|
||||
: AArch64::GPR64RegClass;
|
||||
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
|
||||
&RC, &TRI);
|
||||
return &*--InsertPt;
|
||||
}
|
||||
}
|
||||
|
||||
// Cannot fold.
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -28,3 +28,57 @@ define void @test_sp(i32 %val) {
|
||||
ret void
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
declare i32 @bar()
|
||||
declare i32 @baz()
|
||||
|
||||
; Check that the spill of the zero value gets stored directly instead
|
||||
; of being copied from wzr and then stored.
|
||||
define i32 @test_zr_spill_copyprop1(i1 %c) {
|
||||
; CHECK-LABEL: test_zr_spill_copyprop1:
|
||||
entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.else:
|
||||
; CHECK: bl bar
|
||||
; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%call1 = tail call i32 @bar()
|
||||
br label %if.end
|
||||
|
||||
if.then:
|
||||
; CHECK: bl baz
|
||||
; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
|
||||
%call2 = tail call i32 @baz()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
|
||||
ret i32 %x.0
|
||||
}
|
||||
|
||||
; Similar to test_zr_spill_copyprop1, but with mis-matched register
|
||||
; class between %x.0 and the 0 from %if.then.
|
||||
define i32 @test_zr_spill_copyprop2(i1 %c) {
|
||||
; CHECK-LABEL: test_zr_spill_copyprop2:
|
||||
entry:
|
||||
br i1 %c, label %if.else, label %if.then
|
||||
|
||||
if.else:
|
||||
; CHECK: bl bar
|
||||
; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
|
||||
%call1 = tail call i32 @bar()
|
||||
br label %if.end
|
||||
|
||||
if.then:
|
||||
; CHECK: bl baz
|
||||
; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
|
||||
%call2 = tail call i32 @baz()
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
|
||||
call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
|
||||
%x.1 = add i32 %x.0, 1
|
||||
ret i32 %x.1
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user