mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible.
In GlobalISel we may in some unfortunate circumstances generate PHIs with operands that are on separate banks. If-conversion doesn't currently check for that case and ends up generating a CSEL on AArch64 with incorrect register operands. Differential Revision: https://reviews.llvm.org/D72961
This commit is contained in:
parent
3c9c875d8d
commit
83ca134320
@ -838,15 +838,17 @@ public:
|
||||
///
|
||||
/// @param MBB Block where select instruction would be inserted.
|
||||
/// @param Cond Condition returned by analyzeBranch.
|
||||
/// @param DstReg Virtual dest register that the result should write to.
|
||||
/// @param TrueReg Virtual register to select when Cond is true.
|
||||
/// @param FalseReg Virtual register to select when Cond is false.
|
||||
/// @param CondCycles Latency from Cond+Branch to select output.
|
||||
/// @param TrueCycles Latency from TrueReg to select output.
|
||||
/// @param FalseCycles Latency from FalseReg to select output.
|
||||
virtual bool canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles, int &FalseCycles) const {
|
||||
ArrayRef<MachineOperand> Cond, unsigned DstReg,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles,
|
||||
int &FalseCycles) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
|
||||
assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
|
||||
|
||||
// Get target information.
|
||||
if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
|
||||
PI.CondCycles, PI.TCycles, PI.FCycles)) {
|
||||
if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
|
||||
PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
|
||||
PI.FCycles)) {
|
||||
LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
|
||||
return false;
|
||||
}
|
||||
|
@ -496,8 +496,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
|
||||
|
||||
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles,
|
||||
unsigned DstReg, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles,
|
||||
int &FalseCycles) const {
|
||||
// Check register classes.
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
@ -506,6 +507,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
if (!RC)
|
||||
return false;
|
||||
|
||||
// Also need to check the dest regclass, in case we're trying to optimize
|
||||
// something like:
|
||||
// %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
|
||||
if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
|
||||
return false;
|
||||
|
||||
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
|
||||
unsigned ExtraCondLat = Cond.size() != 1;
|
||||
|
||||
|
@ -191,7 +191,8 @@ public:
|
||||
bool
|
||||
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
|
||||
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
|
||||
unsigned, unsigned, int &, int &, int &) const override;
|
||||
unsigned, unsigned, unsigned, int &, int &,
|
||||
int &) const override;
|
||||
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DstReg,
|
||||
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
||||
|
@ -2128,8 +2128,8 @@ bool SIInstrInfo::reverseBranchCondition(
|
||||
|
||||
bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles,
|
||||
unsigned DstReg, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles, int &FalseCycles) const {
|
||||
switch (Cond[0].getImm()) {
|
||||
case VCCNZ:
|
||||
|
@ -293,9 +293,8 @@ public:
|
||||
SmallVectorImpl<MachineOperand> &Cond) const override;
|
||||
|
||||
bool canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles,
|
||||
ArrayRef<MachineOperand> Cond, unsigned DstReg,
|
||||
unsigned TrueReg, unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles, int &FalseCycles) const override;
|
||||
|
||||
void insertSelect(MachineBasicBlock &MBB,
|
||||
|
@ -753,9 +753,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
|
||||
|
||||
// Select analysis.
|
||||
bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned DstReg, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles, int &FalseCycles) const {
|
||||
if (Cond.size() != 2)
|
||||
return false;
|
||||
|
||||
|
@ -273,7 +273,8 @@ public:
|
||||
|
||||
// Select analysis.
|
||||
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
|
||||
unsigned, unsigned, int &, int &, int &) const override;
|
||||
unsigned, unsigned, unsigned, int &, int &,
|
||||
int &) const override;
|
||||
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DstReg,
|
||||
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
||||
|
@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
|
||||
|
||||
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Pred,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles,
|
||||
unsigned DstReg, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles,
|
||||
int &FalseCycles) const {
|
||||
// Not all subtargets have LOCR instructions.
|
||||
if (!STI.hasLoadStoreOnCond())
|
||||
|
@ -221,8 +221,9 @@ public:
|
||||
int *BytesAdded = nullptr) const override;
|
||||
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &SrcReg2, int &Mask, int &Value) const override;
|
||||
bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
|
||||
unsigned, unsigned, int&, int&, int&) const override;
|
||||
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
|
||||
unsigned, unsigned, unsigned, int &, int &,
|
||||
int &) const override;
|
||||
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DstReg,
|
||||
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
||||
|
@ -2826,11 +2826,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
|
||||
return Count;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::
|
||||
canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
|
||||
bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned DstReg, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles, int &FalseCycles) const {
|
||||
// Not all subtargets have cmov instructions.
|
||||
if (!Subtarget.hasCMov())
|
||||
return false;
|
||||
|
@ -306,7 +306,8 @@ public:
|
||||
const DebugLoc &DL,
|
||||
int *BytesAdded = nullptr) const override;
|
||||
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
|
||||
unsigned, unsigned, int &, int &, int &) const override;
|
||||
unsigned, unsigned, unsigned, int &, int &,
|
||||
int &) const override;
|
||||
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DstReg,
|
||||
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
||||
|
171
test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
Normal file
171
test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
Normal file
@ -0,0 +1,171 @@
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-apple-ios13.3.0"
|
||||
define hidden void @phi_operands_regclasses_different() #0 {
|
||||
entry:
|
||||
br i1 undef, label %if.then139.i, label %if.else142.i
|
||||
|
||||
if.then139.i: ; preds = %entry
|
||||
%0 = load double, double* undef, align 8
|
||||
br label %if.end161.i
|
||||
|
||||
if.else142.i: ; preds = %entry
|
||||
%tobool154.i = icmp eq i8 undef, 0
|
||||
br i1 %tobool154.i, label %if.then155.i, label %if.end161.i
|
||||
|
||||
if.then155.i: ; preds = %if.else142.i
|
||||
%add158.i = fadd double undef, 0.000000e+00
|
||||
br label %if.end161.i
|
||||
|
||||
if.end161.i: ; preds = %if.then155.i, %if.else142.i, %if.then139.i
|
||||
%y2.0.i = phi double [ %0, %if.then139.i ], [ 0.000000e+00, %if.else142.i ], [ 0.000000e+00, %if.then155.i ]
|
||||
%add169.i = fadd double 0.000000e+00, %y2.0.i
|
||||
%1 = fmul double undef, %add169.i
|
||||
%add174.i = fsub double undef, %1
|
||||
%2 = call double @llvm.fabs.f64(double %add174.i) #2
|
||||
%cmp.i516.i = fcmp olt double %2, 0x3BC79CA10C924223
|
||||
br i1 %cmp.i516.i, label %if.then.i.i, label %if.end9.i.i
|
||||
|
||||
if.then.i.i: ; preds = %if.end161.i
|
||||
%3 = call double @llvm.fabs.f64(double undef) #2
|
||||
unreachable
|
||||
|
||||
if.end9.i.i: ; preds = %if.end161.i
|
||||
ret void
|
||||
}
|
||||
declare double @llvm.fabs.f64(double) #1
|
||||
declare void @llvm.stackprotector(i8*, i8**) #2
|
||||
|
||||
attributes #0 = { "target-cpu"="apple-a7" }
|
||||
attributes #1 = { nounwind readnone speculatable willreturn }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = !{!"clang"}
|
||||
|
||||
...
|
||||
---
|
||||
name: phi_operands_regclasses_different
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
selected: true
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers:
|
||||
- { id: 0, class: gpr32, preferred-register: '' }
|
||||
- { id: 1, class: _, preferred-register: '' }
|
||||
- { id: 2, class: _, preferred-register: '' }
|
||||
- { id: 3, class: gpr, preferred-register: '' }
|
||||
- { id: 4, class: gpr64, preferred-register: '' }
|
||||
- { id: 5, class: fpr, preferred-register: '' }
|
||||
- { id: 6, class: _, preferred-register: '' }
|
||||
- { id: 7, class: gpr64, preferred-register: '' }
|
||||
- { id: 8, class: gpr64common, preferred-register: '' }
|
||||
- { id: 9, class: gpr64, preferred-register: '' }
|
||||
- { id: 10, class: fpr64, preferred-register: '' }
|
||||
- { id: 11, class: fpr64, preferred-register: '' }
|
||||
- { id: 12, class: fpr, preferred-register: '' }
|
||||
- { id: 13, class: fpr64, preferred-register: '' }
|
||||
- { id: 14, class: fpr, preferred-register: '' }
|
||||
- { id: 15, class: gpr32, preferred-register: '' }
|
||||
- { id: 16, class: _, preferred-register: '' }
|
||||
- { id: 17, class: gpr32, preferred-register: '' }
|
||||
- { id: 18, class: gpr32, preferred-register: '' }
|
||||
- { id: 19, class: gpr32, preferred-register: '' }
|
||||
- { id: 20, class: gpr, preferred-register: '' }
|
||||
- { id: 21, class: fpr64, preferred-register: '' }
|
||||
- { id: 22, class: fpr64, preferred-register: '' }
|
||||
- { id: 23, class: fpr64, preferred-register: '' }
|
||||
- { id: 24, class: fpr64, preferred-register: '' }
|
||||
- { id: 25, class: fpr64, preferred-register: '' }
|
||||
- { id: 26, class: fpr64, preferred-register: '' }
|
||||
- { id: 27, class: fpr64, preferred-register: '' }
|
||||
- { id: 28, class: gpr64, preferred-register: '' }
|
||||
liveins: []
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 1
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
; Here we check that we don't ifcvt to a CSEL that uses GPRs, because
|
||||
; some operands to the PHI have the fpr64 regclass.
|
||||
; CHECK-LABEL: name: phi_operands_regclasses_different
|
||||
; CHECK-NOT: CSELXr
|
||||
bb.1.entry:
|
||||
successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
|
||||
%0:gpr32 = IMPLICIT_DEF
|
||||
%4:gpr64 = IMPLICIT_DEF
|
||||
%8:gpr64common = IMPLICIT_DEF
|
||||
TBNZW %0, 0, %bb.2
|
||||
B %bb.3
|
||||
|
||||
bb.2.if.then139.i:
|
||||
successors: %bb.5(0x80000000)
|
||||
|
||||
%7:gpr64 = LDRXui %8, 0 :: (load 8 from `double* undef`)
|
||||
B %bb.5
|
||||
|
||||
bb.3.if.else142.i:
|
||||
successors: %bb.4(0x40000000), %bb.5(0x40000000)
|
||||
|
||||
%26:fpr64 = FMOVD0
|
||||
%19:gpr32 = COPY $wzr
|
||||
CBNZW %19, %bb.5
|
||||
|
||||
bb.4.if.then155.i:
|
||||
successors: %bb.5(0x80000000)
|
||||
|
||||
%27:fpr64 = FMOVD0
|
||||
|
||||
bb.5.if.end161.i:
|
||||
successors: %bb.6(0x40000000), %bb.7(0x40000000)
|
||||
|
||||
%9:gpr64 = PHI %7, %bb.2, %26, %bb.3, %27, %bb.4
|
||||
%21:fpr64 = COPY %9
|
||||
%25:fpr64 = FMOVD0
|
||||
%10:fpr64 = FADDDrr %25, %21
|
||||
%22:fpr64 = COPY %4
|
||||
%11:fpr64 = FMULDrr %22, %10
|
||||
%23:fpr64 = COPY %4
|
||||
%13:fpr64 = FABD64 %23, %11
|
||||
%28:gpr64 = MOVi64imm 4307583784117748259
|
||||
%24:fpr64 = COPY %28
|
||||
FCMPDrr %13, %24, implicit-def $nzcv
|
||||
%17:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
|
||||
TBNZW %17, 0, %bb.6
|
||||
B %bb.7
|
||||
|
||||
bb.6.if.then.i.i:
|
||||
successors:
|
||||
|
||||
|
||||
bb.7.if.end9.i.i:
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user