1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible.

In GlobalISel we may in some unfortunate circumstances generate PHIs with
operands that are on separate banks. If-conversion doesn't currently check for
that case and ends up generating a CSEL on AArch64 with incorrect register
operands.

Differential Revision: https://reviews.llvm.org/D72961
This commit is contained in:
Amara Emerson 2020-01-17 14:34:26 -08:00
parent 3c9c875d8d
commit 83ca134320
13 changed files with 213 additions and 27 deletions

View File

@ -838,15 +838,17 @@ public:
///
/// @param MBB Block where select instruction would be inserted.
/// @param Cond Condition returned by analyzeBranch.
/// @param DstReg Virtual dest register that the result should write to.
/// @param TrueReg Virtual register to select when Cond is true.
/// @param FalseReg Virtual register to select when Cond is false.
/// @param CondCycles Latency from Cond+Branch to select output.
/// @param TrueCycles Latency from TrueReg to select output.
/// @param FalseCycles Latency from FalseReg to select output.
virtual bool canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
ArrayRef<MachineOperand> Cond, unsigned DstReg,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles,
int &FalseCycles) const {
return false;
}

View File

@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
// Get target information.
if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
PI.CondCycles, PI.TCycles, PI.FCycles)) {
if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
PI.FCycles)) {
LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
return false;
}

View File

@ -496,8 +496,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles,
unsigned DstReg, unsigned TrueReg,
unsigned FalseReg, int &CondCycles,
int &TrueCycles,
int &FalseCycles) const {
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@ -506,6 +507,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (!RC)
return false;
// Also need to check the dest regclass, in case we're trying to optimize
// something like:
// %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
return false;
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
unsigned ExtraCondLat = Cond.size() != 1;

View File

@ -191,7 +191,8 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int &, int &, int &) const override;
unsigned, unsigned, unsigned, int &, int &,
int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,

View File

@ -2128,8 +2128,8 @@ bool SIInstrInfo::reverseBranchCondition(
bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles,
unsigned DstReg, unsigned TrueReg,
unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
switch (Cond[0].getImm()) {
case VCCNZ:

View File

@ -293,9 +293,8 @@ public:
SmallVectorImpl<MachineOperand> &Cond) const override;
bool canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles,
ArrayRef<MachineOperand> Cond, unsigned DstReg,
unsigned TrueReg, unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const override;
void insertSelect(MachineBasicBlock &MBB,

View File

@ -753,9 +753,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Select analysis.
bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
ArrayRef<MachineOperand> Cond,
unsigned DstReg, unsigned TrueReg,
unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
if (Cond.size() != 2)
return false;

View File

@ -273,7 +273,8 @@ public:
// Select analysis.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int &, int &, int &) const override;
unsigned, unsigned, unsigned, int &, int &,
int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,

View File

@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles,
unsigned DstReg, unsigned TrueReg,
unsigned FalseReg, int &CondCycles,
int &TrueCycles,
int &FalseCycles) const {
// Not all subtargets have LOCR instructions.
if (!STI.hasLoadStoreOnCond())

View File

@ -221,8 +221,9 @@ public:
int *BytesAdded = nullptr) const override;
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &Mask, int &Value) const override;
bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int&, int&, int&) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, unsigned, int &, int &,
int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,

View File

@ -2826,11 +2826,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
return Count;
}
bool X86InstrInfo::
canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned DstReg, unsigned TrueReg,
unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
if (!Subtarget.hasCMov())
return false;

View File

@ -306,7 +306,8 @@ public:
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int &, int &, int &) const override;
unsigned, unsigned, unsigned, int &, int &,
int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,

View File

@ -0,0 +1,171 @@
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios13.3.0"
define hidden void @phi_operands_regclasses_different() #0 {
entry:
br i1 undef, label %if.then139.i, label %if.else142.i
if.then139.i: ; preds = %entry
%0 = load double, double* undef, align 8
br label %if.end161.i
if.else142.i: ; preds = %entry
%tobool154.i = icmp eq i8 undef, 0
br i1 %tobool154.i, label %if.then155.i, label %if.end161.i
if.then155.i: ; preds = %if.else142.i
%add158.i = fadd double undef, 0.000000e+00
br label %if.end161.i
if.end161.i: ; preds = %if.then155.i, %if.else142.i, %if.then139.i
%y2.0.i = phi double [ %0, %if.then139.i ], [ 0.000000e+00, %if.else142.i ], [ 0.000000e+00, %if.then155.i ]
%add169.i = fadd double 0.000000e+00, %y2.0.i
%1 = fmul double undef, %add169.i
%add174.i = fsub double undef, %1
%2 = call double @llvm.fabs.f64(double %add174.i) #2
%cmp.i516.i = fcmp olt double %2, 0x3BC79CA10C924223
br i1 %cmp.i516.i, label %if.then.i.i, label %if.end9.i.i
if.then.i.i: ; preds = %if.end161.i
%3 = call double @llvm.fabs.f64(double undef) #2
unreachable
if.end9.i.i: ; preds = %if.end161.i
ret void
}
declare double @llvm.fabs.f64(double) #1
declare void @llvm.stackprotector(i8*, i8**) #2
attributes #0 = { "target-cpu"="apple-a7" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang"}
...
---
name: phi_operands_regclasses_different
alignment: 4
exposesReturnsTwice: false
legalized: true
regBankSelected: true
selected: true
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: gpr32, preferred-register: '' }
- { id: 1, class: _, preferred-register: '' }
- { id: 2, class: _, preferred-register: '' }
- { id: 3, class: gpr, preferred-register: '' }
- { id: 4, class: gpr64, preferred-register: '' }
- { id: 5, class: fpr, preferred-register: '' }
- { id: 6, class: _, preferred-register: '' }
- { id: 7, class: gpr64, preferred-register: '' }
- { id: 8, class: gpr64common, preferred-register: '' }
- { id: 9, class: gpr64, preferred-register: '' }
- { id: 10, class: fpr64, preferred-register: '' }
- { id: 11, class: fpr64, preferred-register: '' }
- { id: 12, class: fpr, preferred-register: '' }
- { id: 13, class: fpr64, preferred-register: '' }
- { id: 14, class: fpr, preferred-register: '' }
- { id: 15, class: gpr32, preferred-register: '' }
- { id: 16, class: _, preferred-register: '' }
- { id: 17, class: gpr32, preferred-register: '' }
- { id: 18, class: gpr32, preferred-register: '' }
- { id: 19, class: gpr32, preferred-register: '' }
- { id: 20, class: gpr, preferred-register: '' }
- { id: 21, class: fpr64, preferred-register: '' }
- { id: 22, class: fpr64, preferred-register: '' }
- { id: 23, class: fpr64, preferred-register: '' }
- { id: 24, class: fpr64, preferred-register: '' }
- { id: 25, class: fpr64, preferred-register: '' }
- { id: 26, class: fpr64, preferred-register: '' }
- { id: 27, class: fpr64, preferred-register: '' }
- { id: 28, class: gpr64, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
constants: []
machineFunctionInfo: {}
body: |
; Here we check that we don't ifcvt to a CSEL that uses GPRs, because
; some operands to the PHI have the fpr64 regclass.
; CHECK-LABEL: name: phi_operands_regclasses_different
; CHECK-NOT: CSELXr
bb.1.entry:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
%0:gpr32 = IMPLICIT_DEF
%4:gpr64 = IMPLICIT_DEF
%8:gpr64common = IMPLICIT_DEF
TBNZW %0, 0, %bb.2
B %bb.3
bb.2.if.then139.i:
successors: %bb.5(0x80000000)
%7:gpr64 = LDRXui %8, 0 :: (load 8 from `double* undef`)
B %bb.5
bb.3.if.else142.i:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
%26:fpr64 = FMOVD0
%19:gpr32 = COPY $wzr
CBNZW %19, %bb.5
bb.4.if.then155.i:
successors: %bb.5(0x80000000)
%27:fpr64 = FMOVD0
bb.5.if.end161.i:
successors: %bb.6(0x40000000), %bb.7(0x40000000)
%9:gpr64 = PHI %7, %bb.2, %26, %bb.3, %27, %bb.4
%21:fpr64 = COPY %9
%25:fpr64 = FMOVD0
%10:fpr64 = FADDDrr %25, %21
%22:fpr64 = COPY %4
%11:fpr64 = FMULDrr %22, %10
%23:fpr64 = COPY %4
%13:fpr64 = FABD64 %23, %11
%28:gpr64 = MOVi64imm 4307583784117748259
%24:fpr64 = COPY %28
FCMPDrr %13, %24, implicit-def $nzcv
%17:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
TBNZW %17, 0, %bb.6
B %bb.7
bb.6.if.then.i.i:
successors:
bb.7.if.end9.i.i:
RET_ReallyLR
...